#clear environment
rm(list=ls())
# state packages
packages <- c("devtools",
"qiime2R", #github download
"microbiome", #github download
"tidyverse",
"vegan",
"ggforce",
"phyloseq",
"cowplot",
"geosphere",
'lme4',
"car",
"sf",
"pROC", #ROC and AUC for model validation
"ncdf4",
"raster",
"lubridate",
"terra",
"geodata",
"predicts",
"sdmpredictors",
"tidyterra",
"exactextractr",
"arrow",
"here",
"emodnet.wfs", #github
"biooracler",#github
"biomod2",
"dismo",
"xgboost",
"caret",
"gam",
"mda",
"earth",
"maxnet",
"randomForest",
"RODBC",
"odbc")
# Install any missing packages along with their dependencies
new.packages <- packages[!(packages %in% installed.packages()[, "Package"])]
if (length(new.packages)) install.packages(new.packages, dependencies = TRUE)
# Load all packages
invisible(lapply(packages, library, character.only = TRUE))Consistent ensemble predictions of intertidal barnacle distributions across visual and eDNA-based occurrence data
Introduction
This pipeline replicates all analyses in Simons et al (2025) “Consistent ensemble predictions of intertidal barnacle distributions across visual and eDNA-based occurrence data”.
Set-up
Load packages
Functions
snap_to_nearest_valid <- function(environment_stack, occurance_data) {
# Select lon/lat columns using dplyr
coords <- occurance_data %>% select(c(lon, lat))
# Convert to SpatialPoints for raster extraction
pts <- SpatialPoints(coords, proj4string = crs(environment_stack))
# Extract raster values at occurrence points
vals <- raster::extract(environment_stack, pts)
# Identify points on any NA across layers
na_points <- apply(vals, 1, function(x) any(is.na(x)))
# Identify valid cells (no NAs in any layer)
valid_cells <- Which(
!is.na(environment_stack[[1]]) & !is.na(environment_stack[[2]]) & !is.na(environment_stack[[3]]),
cells = TRUE
)
valid_xy <- xyFromCell(environment_stack, valid_cells)
# Initialize corrected coordinates
corrected_coords <- coords
# Snap only points on NA cells
if(any(na_points)) {
nn <- FNN::get.knnx(valid_xy, coords[na_points, , drop = FALSE], k = 1)
corrected_coords[na_points, ] <- valid_xy[nn$nn.index, ]
}
return(corrected_coords)
}g_legend<-function(a.gplot){
tmp <- ggplot_gtable(ggplot_build(a.gplot))
leg <- which(sapply(tmp$grobs, function(x) x$name) == "guide-box")
legend <- tmp$grobs[[leg]]
}Set seed
This is to make sure the random elements in models start from the same point every time. This will make our results reproducible. This is also done before any randomized element such as before each model.
seed.val.set = 42 # life, the universe, and everything
options(biomod2.parallel = FALSE) # forces everything to run sequentially, so seeds behave more predictablyOccurrence data
Import paired eDNA and MarClim occurrences
Let’s first read in our occurrences and metadata.
# occurance data
occurance_data <- read.csv(file = "Data/Occurrence_data/my_dataset/combined_methods_long_data.csv", row.names = 1)
# get lat long from meta data
metadata <- read.csv(file = "Data/Metadata/metadata.csv", na.strings = c(""))
metadata <- metadata %>%
subset(select = c(localityID, exposure, decimalLongitude, decimalLatitude)) %>% unique() %>% na.omit()
# add lat long to occurance
occurance_data <- left_join(occurance_data, metadata)
# filter to only species of interest
species_list <- c(
"Semibalanus balanoides",
"Chthamalus montagui"
)
occurance_data_reduced <- occurance_data %>% filter(taxa %in% species_list)
str(occurance_data_reduced)'data.frame': 116 obs. of 15 variables:
$ taxa : chr "Chthamalus montagui" "Chthamalus montagui" "Chthamalus montagui" "Chthamalus montagui" ...
$ localityID : chr "CN01" "CN02" "CN03" "CN04" ...
$ count : num 7 7 7 6 7 ...
$ region : chr "Southwest England" "Southwest England" "Southwest England" "Southwest England" ...
$ genus : chr "Chthamalus" "Chthamalus" "Chthamalus" "Chthamalus" ...
$ site : chr "Lizard Point" "Sennen Cove" "Looe" "St Ives" ...
$ phylum : chr "Arthropoda" "Arthropoda" "Arthropoda" "Arthropoda" ...
$ method : chr "Visual" "Visual" "Visual" "Visual" ...
$ pa : int 1 1 1 1 1 0 0 0 0 0 ...
$ num_samples : int NA NA NA NA NA NA NA NA NA NA ...
$ num_samples_found : int NA NA NA NA NA NA NA NA NA NA ...
$ prop_samples_found: num NA NA NA NA NA NA NA NA NA NA ...
$ exposure : chr "Sheltered" "Moderatley exposed" "Moderatley exposed" "Exposed" ...
$ decimalLongitude : num -5.21 -5.71 -4.46 -5.48 -4.99 ...
$ decimalLatitude : num 50 50.1 50.3 50.2 50.5 ...
Split into species of interest for combined and individual methods.
# Clean species names to use in object names (e.g., "Semibalanus balanoides" -> "semibalanus")
clean_name <- function(name) {
tolower(gsub(" ", "_", name))
}
# Loop through each species and create the filtered data frames
for (species in species_list) {
clean <- clean_name(species)
assign(paste0(clean, "_obs_eDNA"),
occurance_data_reduced %>%
filter(taxa == species, method == "eDNA", pa == 1) %>%
select(taxa, decimalLongitude, decimalLatitude) %>%
rename(lat = decimalLatitude, lon = decimalLongitude, species = taxa))
assign(paste0(clean, "_obs_visual"),
occurance_data_reduced %>%
filter(taxa == species, method == "Visual", pa == 1) %>%
select(taxa, decimalLongitude, decimalLatitude) %>%
rename(lat = decimalLatitude, lon = decimalLongitude, species = taxa))
}
# Check the data to make sure it loaded correctly
head(semibalanus_balanoides_obs_eDNA) species lon lat
1 Semibalanus balanoides -5.7100 50.0780
2 Semibalanus balanoides -4.4580 50.3410
3 Semibalanus balanoides -5.4751 50.2178
4 Semibalanus balanoides -4.9850 50.5450
5 Semibalanus balanoides -0.2609 54.2158
6 Semibalanus balanoides -0.4079 54.3014
# Determine geographic extent of our data
# find general latitudinal and longitudinal boundaries
max_lat <- ceiling(max(occurance_data_reduced$decimalLatitude))
min_lat <- floor(min(occurance_data_reduced$decimalLatitude))
max_lon <- ceiling(max(occurance_data_reduced$decimalLongitude))
min_lon <- floor(min(occurance_data_reduced$decimalLongitude))
# Store boundaries in a single extent object
geographic_extent <- ext(x = c(min_lon, max_lon, min_lat, max_lat))
uk_extent <- ext(-11, 2, 49.5, 61) # Westernmost to easternmost, southernmost to northernmost
uk_extent_match <- ext(-10.8,2,49.4,60.8)
# plot features
wrld <- world(path=".")Import wider MarClim occurrences
MarClim has a broader range of sites than just the sites matched to our eDNA samples. We can use this as an evaluation data set because it contains close to true absence and presence for the sampled sites. We are going to stick to 2022 - 2023 period to keep things simple.
# read
marclim_broad <- read.csv(file = "Data/Occurrence_data/my_dataset/MarClim_SACFOR_22_23.csv")
# filter for just semibalanus and chthamalus
marclim_broad_sp <- marclim_broad %>%
select(Semibalanus.balanoides,
Chthamalus.montagui,
Lat..WGS84.,
Long..WGS84.)
#convert to PA
marclim_broad_sp_PA <- marclim_broad_sp %>%
mutate(across(
c(Semibalanus.balanoides, Chthamalus.montagui),
~ ifelse(. %in% c("NR", "NS", "?", ""), 0, 1)
)) %>% na.omit()
# tidy labels
marclim_broad_sp_PA <- marclim_broad_sp_PA %>%
rename(
lat = Lat..WGS84.,
lon = Long..WGS84.
)
#only within uK extent
marclim_broad_sp_PA <- subset(marclim_broad_sp_PA,
lon >= -10.8 & lon <= 2 &
lat >= 49.72 & lat <= 60.8
)
# make individual species presence only datasets for maps
semibalanus_marclim_broad <- marclim_broad_sp_PA %>%
select(-c(Chthamalus.montagui)) %>%
filter(Semibalanus.balanoides == 1) %>%
mutate(species = "Semibalanus balanoides") %>%
select(-c(Semibalanus.balanoides))
chthamalus_marclim_broad <- marclim_broad_sp_PA %>%
select(-c(Semibalanus.balanoides)) %>%
filter(Chthamalus.montagui == 1) %>%
mutate(species = "Chthamalus montagui") %>%
select(-c(Chthamalus.montagui))Import GBIF occurrences
We’ve downloaded previously (the # code) then re-imported to save time and memory.
# download all obs from 2000 onwards
minYear = 2000
#semibalanus_GBIF_obs <- geodata::sp_occurrence("semibalanus", "balanoides", geo =
#TRUE, ext = uk_extent)
#dups <- duplicated(semibalanus_GBIF_obs[, c('lon', 'lat')])
#semibalanus_GBIF_obs_nodups <- semibalanus_GBIF_obs[!dups, ]
#semibalanus_GBIF_obs_nodups_recent <- subset(semibalanus_GBIF_obs_nodups, year >= minYear)
# Seperate the baseline data (2000 - 2021)
baselineYears <- c(2000:2021)
#semibalanus_GBIF_base <- semibalanus_GBIF_obs_nodups_recent %>%
#filter(year %in% baselineYears,
#country %in% c("United Kingdom", "Isle of Man")) %>% #filter out Ireland
#filter(!(lon > -8.2 & lon < -5.4 & lat > 54.0 & lat < 55.3)) # filter out
#write.csv(semibalanus_GBIF_base, file = paste("Data/Occurrence_data/GBIF/semibalanus_GBIF_base",minYear,".csv", sep = ""))
semibalanus_GBIF_base <- read.csv("Data/Occurrence_data/GBIF/semibalanus_GBIF_base2000.csv")
# 673 obs for this period
# Seperate the additional data (2022 - 2023)
#semibalanus_GBIF_additional <- semibalanus_GBIF_obs_nodups_recent %>%
#filter(year %in% c(2022, 2023),
#country %in% c("United Kingdom", "Isle of Man")) %>% #filter out Ireland
#filter(!(lon > -8.2 & lon < -5.4 & lat > 54.0 & lat < 55.3)) # filter out Northern Ireland
#write.csv(semibalanus_GBIF_additional, file = paste("Data/Occurrence_data/GBIF/semibalanus_GBIF_additional_2022.csv", sep = ""))
semibalanus_GBIF_additional <- read.csv("Data/Occurrence_data/GBIF/semibalanus_GBIF_additional_2022.csv")
# obs for additonal observations# download all obs from 2000 onwards
#chthamalus_GBIF_obs <- geodata::sp_occurrence("chthamalus", "montagui", geo =
#TRUE, ext = uk_extent)
#dups <- duplicated(chthamalus_GBIF_obs[, c('lon', 'lat')])
#chthamalus_GBIF_obs_nodups <- chthamalus_GBIF_obs[!dups, ]
#chthamalus_GBIF_obs_nodups_recent <- subset(chthamalus_GBIF_obs_nodups, year >= minYear)
# Seperate the baseline data (2000 - 2021)
#chthamalus_GBIF_base <- chthamalus_GBIF_obs_nodups_recent %>%
#filter(year %in% baselineYears,
#country %in% c("United Kingdom", "Isle of Man")) %>% #filter out Ireland
#filter(!(lon > -8.2 & lon < -5.4 & lat > 54.0 & lat < 55.3)) # filter out
#write.csv(chthamalus_GBIF_base, file = paste("Data/Occurrence_data/GBIF/chthamalus_GBIF_base",minYear,".csv", sep = ""))
chthamalus_GBIF_base <- read.csv("Data/Occurrence_data/GBIF/chthamalus_GBIF_base2000.csv")
# 578 obs for this period
# Seperate the additional data (2022 - 2023)
#chthamalus_GBIF_additional <- chthamalus_GBIF_obs_nodups_recent %>%
#filter(year %in% c(2022, 2023),
#country %in% c("United Kingdom", "Isle of Man")) %>% #filter out Ireland
#filter(!(lon > -8.2 & lon < -5.4 & lat > 54.0 & lat < 55.3)) # filter out Northern Ireland
#write.csv(chthamalus_GBIF_additional, file = paste("Data/Occurrence_data/GBIF/chthamalus_GBIF_additional_2022.csv", sep = ""))
chthamalus_GBIF_additional <- read.csv("Data/Occurrence_data/GBIF/chthamalus_GBIF_additional_2022.csv")
# obs for additonal observationsCreate data sets for model
We need the following versions of our data for each species:
MarClim alone (as the evaluation data set)
GBIF base (2000 - 2021) alone
GBIF base (2000 - 2021) + GBIF additional (2022 - 2023)
GBIF base (2000 - 2021) + eDNA additional (2022 - 2023)
# MarClim alone - 218 obs
semibalanus_MarClim <- semibalanus_marclim_broad %>%
mutate(species = as.factor(species),
source = "MarClim") %>%
select(species, lat, lon, source)
# GBIF base (2000 - 2021) + GBIF (2022 - 2023) - 1649 obs
semibalanus_GBIF_base_clean <- semibalanus_GBIF_base %>%
mutate(species = as.factor(species),
source = "GBIF_base") %>%
select(species, lat, lon, source)
semibalanus_GBIF_baseline <- semibalanus_GBIF_base_clean
semibalanus_GBIF_additional_clean <- semibalanus_GBIF_additional %>%
mutate(species = as.factor(species),
source = "GBIF_additional") %>%
select(species, lat, lon, source)
semibalanus_GBIF_GBIF <- rbind(semibalanus_GBIF_base_clean, semibalanus_GBIF_additional_clean)
# GBIF base data (2000 - 2021) + eDNA (2022 - 2023) - 1631 obs
semibalanus_balanoides_obs_eDNA_clean <- semibalanus_balanoides_obs_eDNA %>%
mutate(species = as.factor(species),
source = "eDNA") %>%
select(species, lat, lon, source)
semibalanus_GBIF_eDNA <- rbind(semibalanus_GBIF_base_clean, semibalanus_balanoides_obs_eDNA_clean)
# we can make a full version for our map
semibalanus_all_sources <- rbind(
semibalanus_MarClim,
semibalanus_GBIF_base_clean,
semibalanus_GBIF_additional_clean,
semibalanus_balanoides_obs_eDNA_clean
)Let’s check to make sure they worked correctly.
dim(semibalanus_MarClim) # marclim alone[1] 218 4
dim(semibalanus_GBIF_baseline) # GBIF baseline[1] 1611 4
dim(semibalanus_GBIF_GBIF) #GBIF base + GBIF[1] 1649 4
dim(semibalanus_GBIF_eDNA) #GBIF base + eDNA[1] 1631 4
dim(semibalanus_all_sources) # all[1] 1887 4
Same again for chthamalus.
# MarClim alone - 145 obs
chthamalus_MarClim <- chthamalus_marclim_broad %>%
mutate(species = as.factor(species),
source = "MarClim") %>%
select(species, lat, lon, source)
# GBIF base (2000 - 2021) + GBIF (2022 - 2023) - 605 obs
chthamalus_GBIF_base_clean <- chthamalus_GBIF_base %>%
mutate(species = as.factor(species),
source = "GBIF_base") %>%
select(species, lat, lon, source)
chthamalus_GBIF_baseline <- chthamalus_GBIF_base_clean
chthamalus_GBIF_additional_clean <- chthamalus_GBIF_additional %>%
mutate(species = as.factor(species),
source = "GBIF_additional") %>%
select(species, lat, lon, source)
chthamalus_GBIF_GBIF <- rbind(chthamalus_GBIF_base_clean, chthamalus_GBIF_additional_clean)
# GBIF base data (2000 - 2021) + eDNA (2022 - 2023) - 589 obs
chthamalus_montagui_obs_eDNA_clean <- chthamalus_montagui_obs_eDNA %>%
mutate(species = as.factor(species),
source = "eDNA") %>%
select(species, lat, lon, source)
chthamalus_GBIF_eDNA <- rbind(chthamalus_GBIF_base_clean, chthamalus_montagui_obs_eDNA_clean)
# we can make a full version for our map
chthamalus_all_sources <- rbind(
chthamalus_MarClim,
chthamalus_GBIF_base_clean,
chthamalus_GBIF_additional_clean,
chthamalus_montagui_obs_eDNA_clean
)Let’s check to make sure they worked correctly.
dim(chthamalus_MarClim) # marclim alone[1] 145 4
dim(chthamalus_GBIF_baseline) #GBIF baseline[1] 578 4
dim(chthamalus_GBIF_GBIF) #GBIF base + GBIF[1] 605 4
dim(chthamalus_GBIF_eDNA) #GBIF base + eDNA[1] 589 4
dim(chthamalus_all_sources) # all[1] 761 4
Visualize occurrences
Let’s plot our occurrences on a map.
# Get UK boundaries at admin level 1 (countries/subunits)
uk <- rnaturalearth::ne_countries(country = c("United Kingdom"), returnclass = "sf", scale = "large")
northern_ireland <- rnaturalearth::ne_states(country = "United Kingdom", returnclass = "sf") %>%
filter(geonunit == "Northern Ireland") %>%
st_union() %>% # merge all the small pieces
st_as_sf() # back to sf object
other_maps <- rnaturalearth::ne_countries(country = c("Ireland", "Isle of Man", "France"), returnclass = "sf", scale = "large")# colours
GBIF_colour = "#C1292E"
MarClim_colour = "darkgreen"
eDNA_colour= "#235789"
GBIF_base_colour = "#FDE235"semibalanus_map <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "grey") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
# Add all points except MarClim
geom_jitter(data = subset(semibalanus_all_sources, source != "MarClim"),
aes(x = lon, y = lat, color = source, shape = source),
size = 1.1, width = 0.025, height = 0.025, alpha = 0.6) +
coord_sf(xlim = c(-8, 3), ylim = c(49.7, 61)) +
theme_minimal() +
labs(x = "Longitude", y = "Latitude", color = "Data source") +
scale_color_manual(values = c(
"GBIF_additional" = GBIF_colour,
"eDNA" = eDNA_colour,
"GBIF_base" = GBIF_base_colour
)) +
scale_shape_manual(values = c(
"GBIF_additional" = 17, # solid triangle
"eDNA" = 15, # solid square
"GBIF_base" = 19)) +
theme(legend.position = "none")
semibalanus_mapchthamalus_map <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "grey") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
# Add all points except MarClim
geom_jitter(data = subset(chthamalus_all_sources, source != "MarClim"),
aes(x = lon, y = lat, color = source, shape = source),
size = 1.1, width = 0.025, height = 0.025, alpha = 0.6) +
coord_sf(xlim = c(-8, 3), ylim = c(49.7, 61)) +
theme_minimal() +
labs(x = "Longitude", y = "Latitude", color = "Data source") +
scale_color_manual(values = c(
"GBIF_additional" = GBIF_colour,
"eDNA" = eDNA_colour,
"GBIF_base" = GBIF_base_colour
)) +
scale_shape_manual(values = c(
"GBIF_additional" = 17, # solid triangle
"eDNA" = 15, # solid square
"GBIF_base" = 19)) +
theme(legend.position = "none")
chthamalus_mapchthamalus_map_legend <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "grey") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
# Add all points except MarClim
geom_jitter(data = subset(chthamalus_all_sources, source != "MarClim"),
aes(x = lon, y = lat, color = source, shape = source),
size = 1.1, width = 0.025, height = 0.025, alpha = 0.6) +
coord_sf(xlim = c(-8, 3), ylim = c(49.7, 61)) +
theme_minimal() +
labs(x = "Longitude", y = "Latitude", color = "Data source", shape = "Data source") +
scale_color_manual(
values = c(
"GBIF_additional" = GBIF_colour,
"eDNA" = eDNA_colour,
"GBIF_base" = GBIF_base_colour
),
labels = c(
"GBIF_additional" = "GBIF additional",
"eDNA" = "eDNA",
"GBIF_base" = "GBIF baseline"
)
) +
scale_shape_manual(
values = c(
"GBIF_additional" = 17, # solid triangle
"eDNA" = 15, # solid square
"GBIF_base" = 19 # solid circle
),
labels = c(
"GBIF_additional" = "GBIF additional",
"eDNA" = "eDNA",
"GBIF_base" = "GBIF baseline"
)
) +
theme(legend.position = "bottom")
legend <- g_legend(chthamalus_map_legend)joint_map <- cowplot::plot_grid(
cowplot::plot_grid(
semibalanus_map,
chthamalus_map,
ncol = 2,
labels = c("a", "b"),
rel_heights = c(0.8, 1)
),
legend,
ncol = 1,
rel_heights = c(1, 0.06)
)
joint_map# Save the plot
ggsave(joint_map, filename = "Figures/chthamalus_semibalanus_map.png", width = 9, height = 7, dpi = 300)semibalanus_map_eval <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "grey") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
# Add all points except MarClim
geom_jitter(data = subset(semibalanus_all_sources, source == "MarClim"),
aes(x = lon, y = lat, color = source, shape = source),
size = 1.1, width = 0.025, height = 0.025, alpha = 0.6) +
coord_sf(xlim = c(-8, 3), ylim = c(49.7, 61)) +
theme_minimal() +
labs(x = "Longitude", y = "Latitude", color = "Data source") +
scale_color_manual(values = c(
"MarClim" = MarClim_colour
)) +
theme(legend.position = "none")
semibalanus_map_evalchthamalus_map_eval <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "grey") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
# Add all points except MarClim
geom_jitter(data = subset(chthamalus_all_sources, source == "MarClim"),
aes(x = lon, y = lat, color = source, shape = source),
size = 1.1, width = 0.025, height = 0.025, alpha = 0.6) +
coord_sf(xlim = c(-8, 3), ylim = c(49.7, 61)) +
theme_minimal() +
labs(x = "Longitude", y = "Latitude", color = "Data source") +
scale_color_manual(values = c(
"MarClim" = MarClim_colour
)) +
theme(legend.position = "none")
chthamalus_map_evalEnvironmental data
It’s time to get environmental variables of interest. We have a few options here - let’s explore them.
# set uk extent and colours
uk_extent <- extent(-8, 2, 49.7, 61)
cols <- colorRampPalette(c("blue", "lightblue", "yellow", "red"))(100)Temp, pH, coastline (Bio-ORCLE)
Bio-ORACLE is a comprehensive data set providing a wide range of marine environmental variables tailored for ecological and bio-geographical analysis. These variables include surface and benthic values for parameters such as temperature, salinity, nutrients, oxygen, pH, and more, derived from satellite and modeled oceanographic data.
We use the biooracler package here to obtain our environmental variables of interest from current and projected periods.
## ocean temp
ocean_temp_present = readRDS("Data/Environmental_data/BIO-ORACLE/thetao_baseline_2000_2019_depthsurf.rds")
plot(
ocean_temp_present,
col = cols
)ocean_temp_avg <- mean(ocean_temp_present)
ocean_temp_avgclass : SpatRaster
dimensions : 227, 201, 1 (nrow, ncol, nlyr)
resolution : 0.05, 0.05000001 (x, y)
extent : -8.05, 2, 49.7, 61.05 (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (CRS84) (OGC:CRS84)
source(s) : memory
name : mean
min value : 9.076521
max value : 13.317162
ocean_temp_projected = readRDS("Data/Environmental_data/BIO-ORACLE/thetao_ssp585_2020_2100_depthsurf.rds")
plot(
ocean_temp_projected,
col = cols
)ocean_temp_projected_90 = ocean_temp_projected["thetao_mean_8"]
## air temp
air_temp_present = readRDS("Data/Environmental_data/BIO-ORACLE/tas_baseline_2000_2020_depthsurf.rds")
plot(
air_temp_present,
col = cols
)air_temp_projected = readRDS("Data/Environmental_data/BIO-ORACLE/tas_ssp585_2020_2100_depthsurf.rds")
plot(
air_temp_projected,
col = cols
)## ph
ph_present = readRDS("Data/Environmental_data/BIO-ORACLE/ph_baseline_2000_2018_depthsurf.rds")
plot(ph_present,
col = cols)ph_avg <- mean(ph_present)
ph_avgclass : SpatRaster
dimensions : 227, 201, 1 (nrow, ncol, nlyr)
resolution : 0.05, 0.05000001 (x, y)
extent : -8.05, 2, 49.7, 61.05 (xmin, xmax, ymin, ymax)
coord. ref. : lon/lat WGS 84 (CRS84) (OGC:CRS84)
source(s) : memory
name : mean
min value : 8.025923
max value : 8.116090
ph_projected = readRDS("Data/Environmental_data/BIO-ORACLE/ph_ssp585_2020_2100_depthsurf.rds")
plot(ph_projected,
col = cols)ph_projected_90 <- ph_projected["ph_mean_8"]
# terrain
terrain <- readRDS("Data/Environmental_data/BIO-ORACLE/terrain_characteristics.rds")
plot(terrain,
main = "Terrain")Rocky substrate (JNCC)
Let’s try the JNCC UKASH Mosaic of Localized Maps which does include intertidal data, but only for 12% of the UK.
# Path to the unzipped .gdb folder for UKASH Mosaic of Localised Maps (with intertidal zone)
gdb_path <- "Data/Environmental_data/JNCC/UKASH_MosaicOfLocalisedMaps_v2025.gdb"
# List available layers
st_layers(gdb_path)Driver: OpenFileGDB
Available layers:
layer_name geometry_type features fields crs_name
1 UKASH_MosaicLocalisedMaps_v2025_1 Multi Polygon 899864 20 WGS 84
# read in
ukash <- st_read(gdb_path, layer = "UKASH_MosaicLocalisedMaps_v2025_1")Reading layer `UKASH_MosaicLocalisedMaps_v2025_1' from data source
`C:\Users\dlsimons\Github_Repositories\intertidal-eDNA-SDMs-analyses\Data\Environmental_data\JNCC\UKASH_MosaicOfLocalisedMaps_v2025.gdb'
using driver `OpenFileGDB'
Simple feature collection with 899864 features and 20 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -14.40942 ymin: 48.11506 xmax: 2.89474 ymax: 60.86268
Geodetic CRS: WGS 84
# explore
print(ukash)Simple feature collection with 899864 features and 20 fields
Geometry type: MULTIPOLYGON
Dimension: XY
Bounding box: xmin: -14.40942 ymin: 48.11506 xmax: 2.89474 ymax: 60.86268
Geodetic CRS: WGS 84
First 10 features:
SNCB_UID EMODnetGUI OrigCode
1 GB001446 M.AtLB.Ro
2 GB001446 M.AtLB.Ro
3 GB001446 M.AtLB.Ro
4 GB001446 M.AtLB.Ro
5 GB001446 M.AtLB.Ro
6 GB001446 M.AtLB.Ro
7 GB001446 M.AtLB.Ro
8 GB001446 M.AtLB.Ro
9 GB001446 M.AtLB.Ro
10 GB001446 M.AtLB.Ro
OrigName
1 Atlantic lower bathyal rock and other hard substrata
2 Atlantic lower bathyal rock and other hard substrata
3 Atlantic lower bathyal rock and other hard substrata
4 Atlantic lower bathyal rock and other hard substrata
5 Atlantic lower bathyal rock and other hard substrata
6 Atlantic lower bathyal rock and other hard substrata
7 Atlantic lower bathyal rock and other hard substrata
8 Atlantic lower bathyal rock and other hard substrata
9 Atlantic lower bathyal rock and other hard substrata
10 Atlantic lower bathyal rock and other hard substrata
OrigClass EUNISCode
1 Marine Habitat Classification for Britain and Ireland v22.04 A6.12
2 Marine Habitat Classification for Britain and Ireland v22.04 A6.12
3 Marine Habitat Classification for Britain and Ireland v22.04 A6.12
4 Marine Habitat Classification for Britain and Ireland v22.04 A6.12
5 Marine Habitat Classification for Britain and Ireland v22.04 A6.12
6 Marine Habitat Classification for Britain and Ireland v22.04 A6.12
7 Marine Habitat Classification for Britain and Ireland v22.04 A6.12
8 Marine Habitat Classification for Britain and Ireland v22.04 A6.12
9 Marine Habitat Classification for Britain and Ireland v22.04 A6.12
10 Marine Habitat Classification for Britain and Ireland v22.04 A6.12
EUNISDetDate EUNISDetName EUNISTranR EUNISTranC MHCCode
1 <NA> <NA> # M.AtLB.Ro
2 <NA> <NA> # M.AtLB.Ro
3 <NA> <NA> # M.AtLB.Ro
4 <NA> <NA> # M.AtLB.Ro
5 <NA> <NA> # M.AtLB.Ro
6 <NA> <NA> # M.AtLB.Ro
7 <NA> <NA> # M.AtLB.Ro
8 <NA> <NA> # M.AtLB.Ro
9 <NA> <NA> # M.AtLB.Ro
10 <NA> <NA> # M.AtLB.Ro
MHCDetDate MHCDetName MHCTranR MHCTranC MESH_conf stp3_conf EUNISL3
1 2023-09-02 01:00:00 Cefas S NA NA A6.1
2 2023-09-02 01:00:00 Cefas S NA NA A6.1
3 2023-09-02 01:00:00 Cefas S NA NA A6.1
4 2023-09-02 01:00:00 Cefas S NA NA A6.1
5 2023-09-02 01:00:00 Cefas S NA NA A6.1
6 2023-09-02 01:00:00 Cefas S NA NA A6.1
7 2023-09-02 01:00:00 Cefas S NA NA A6.1
8 2023-09-02 01:00:00 Cefas S NA NA A6.1
9 2023-09-02 01:00:00 Cefas S NA NA A6.1
10 2023-09-02 01:00:00 Cefas S NA NA A6.1
SHAPE_Length SHAPE_Area SHAPE
1 0.001357848 1.107135e-07 MULTIPOLYGON (((-9.821604 4...
2 0.001357856 1.107145e-07 MULTIPOLYGON (((-9.821615 4...
3 0.001357865 1.107158e-07 MULTIPOLYGON (((-9.821622 4...
4 0.001358004 1.107346e-07 MULTIPOLYGON (((-9.821765 4...
5 0.004487330 6.921092e-07 MULTIPOLYGON (((-9.821802 4...
6 0.001357906 1.107213e-07 MULTIPOLYGON (((-9.821665 4...
7 0.004352628 7.196127e-07 MULTIPOLYGON (((-9.821565 4...
8 0.006658651 1.134971e-06 MULTIPOLYGON (((-9.820716 4...
9 0.005838411 1.162602e-06 MULTIPOLYGON (((-9.82067 48...
10 0.009652807 2.573850e-06 MULTIPOLYGON (((-9.8215 48....
# filter for rocky only
rock_ukash <- ukash %>%
filter(str_detect(str_to_lower(OrigName), "rock"))
# make a raster and match format to the other environmental variables
rock_ukash_rast <- raster::rasterize(rock_ukash, ocean_temp_present)
# plot
rock_ukash_plot <- ggplot(rock_ukash) +
geom_sf(aes(fill = OrigName)) +
scale_fill_viridis_d(option = "plasma", name = "Substrate Type") +
coord_sf(crs = 4326) +
theme_minimal() +
labs(
title = "UKASH Mosaic Localised Rock Habitat Map",
x = "Longitude", y = "Latitude"
) + theme(legend.position = "none")
rock_ukash_plotggsave(rock_ukash_plot, filename = "Figures/UKASH_Mosaic_Localised_Rock.jpeg")To get a better coverage, let’s import rocky habitat information from JNCC Marine Recorder Public UK snapshot.
Two Microsoft access databases (primary and secondary) are available at https://hub.jncc.gov.uk/assets/b9934e31-39b6-41f9-9364-d1e93db68307. The rocky habitat we are interested in are defined here - https://mhc.jncc.gov.uk/biotopes/jnccmncr00000003. Primary database contains locations, secondary contains biotypes.
# lists the drivers already installed for excel, access ect...
unique(odbc::odbcListDrivers()[[1]])[1] "SQL Server"
[2] "Microsoft Access Driver (*.mdb, *.accdb)"
[3] "Microsoft Excel Driver (*.xls, *.xlsx, *.xlsm, *.xlsb)"
[4] "Microsoft Access Text Driver (*.txt, *.csv)"
[5] "Microsoft Access dBASE Driver (*.dbf, *.ndx, *.mdx)"
# set database locations
primary_dbname <- "Data/Environmental_data/JNCC/SnapshotDatav52_PUBLIC_20220124.mdb"
secondary_dbname <- "Data/Environmental_data/JNCC//SnapshotDatav52_PUBLIC_20220124_secondary.mdb"
# Establish connections between R and Access
RODBC::odbcCloseAll() # clear any existing
primary_con <- RODBC::odbcConnectAccess2007(primary_dbname)
secondary_con <- RODBC::odbcConnectAccess2007(secondary_dbname)
RODBC::sqlTables(primary_con) TABLE_CAT
1 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
2 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
3 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
4 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
5 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
6 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
7 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
8 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
9 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
10 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
11 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
12 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
13 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
14 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
15 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
16 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
17 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
18 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
19 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
20 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
21 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
22 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
23 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
24 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
25 Data\\Environmental_data\\JNCC\\SnapshotDatav52_PUBLIC_20220124.mdb
TABLE_SCHEM TABLE_NAME TABLE_TYPE REMARKS
1 <NA> PhysicalData SYNONYM <NA>
2 <NA> SampleBiotopeAll SYNONYM <NA>
3 <NA> SampleBiotopeHistory SYNONYM <NA>
4 <NA> SampleSpeciesHistory SYNONYM <NA>
5 <NA> MSysAccessObjects SYSTEM TABLE <NA>
6 <NA> MSysAccessXML SYSTEM TABLE <NA>
7 <NA> MSysACEs SYSTEM TABLE <NA>
8 <NA> MSysNavPaneGroupCategories SYSTEM TABLE <NA>
9 <NA> MSysNavPaneGroups SYSTEM TABLE <NA>
10 <NA> MSysNavPaneGroupToObjects SYSTEM TABLE <NA>
11 <NA> MSysNavPaneObjectIDs SYSTEM TABLE <NA>
12 <NA> MSysObjects SYSTEM TABLE <NA>
13 <NA> MSysQueries SYSTEM TABLE <NA>
14 <NA> MSysRelationships SYSTEM TABLE <NA>
15 <NA> Location TABLE <NA>
16 <NA> Reference TABLE <NA>
17 <NA> Sample TABLE <NA>
18 <NA> SampleReplicate TABLE <NA>
19 <NA> SampleSpecies TABLE <NA>
20 <NA> SelectedObjects TABLE <NA>
21 <NA> Selections TABLE <NA>
22 <NA> Survey TABLE <NA>
23 <NA> SurveyEvent TABLE <NA>
24 <NA> SurveyReference TABLE <NA>
25 <NA> Version TABLE <NA>
RODBC::sqlTables(secondary_con) TABLE_CAT
1 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
2 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
3 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
4 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
5 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
6 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
7 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
8 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
9 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
10 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
11 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
12 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
13 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
14 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
15 Data\\Environmental_data\\JNCC\\\\SnapshotDatav52_PUBLIC_20220124_secondary.mdb
TABLE_SCHEM TABLE_NAME TABLE_TYPE REMARKS
1 <NA> MSysAccessObjects SYSTEM TABLE <NA>
2 <NA> MSysAccessXML SYSTEM TABLE <NA>
3 <NA> MSysACEs SYSTEM TABLE <NA>
4 <NA> MSysNavPaneGroupCategories SYSTEM TABLE <NA>
5 <NA> MSysNavPaneGroups SYSTEM TABLE <NA>
6 <NA> MSysNavPaneGroupToObjects SYSTEM TABLE <NA>
7 <NA> MSysNavPaneObjectIDs SYSTEM TABLE <NA>
8 <NA> MSysObjects SYSTEM TABLE <NA>
9 <NA> MSysQueries SYSTEM TABLE <NA>
10 <NA> MSysRelationships SYSTEM TABLE <NA>
11 <NA> PhysicalData TABLE <NA>
12 <NA> SampleBiotopeAll TABLE <NA>
13 <NA> SampleBiotopeHistory TABLE <NA>
14 <NA> SampleSpeciesHistory TABLE <NA>
15 <NA> Version TABLE <NA>
# Fetch the data from the SampleBiotopeAll table
locations <- RODBC::sqlFetch(primary_con, "Location", rownames = T)
SampleBiotopeAll <- RODBC::sqlFetch(secondary_con, "SampleBiotopeAll", rownames = T)
# Pull all the Biotope occurrence keys littoral habitats
Biotope_littoral <- SampleBiotopeAll %>%
dplyr::filter(grepl("LR", BiotopeCode)) # matches any LR entries (includes eulittoral)
# join
biotype_location <- left_join(Biotope_littoral, locations, by = "ObjectId")
biotype_location_reduced <- biotype_location %>% select(ObjectId, BiotopeCode, Lat, Long, LatWGS84, LongWGS84, Qualifier, CoordinateSystem) %>% na.omit()
# remove uncertain
biotype_location_reduced_certain <- biotype_location_reduced %>%
dplyr::filter(!grepl("Uncertain", Qualifier))
#visualise - looks like a some points are incorrect
rocky_map <- ggplot() +
geom_sf(data = wrld, fill = "lightyellow", color = "lightgray") +
# Add all points with color mapped to 'source'
geom_jitter(data = biotype_location_reduced,
aes(x = LongWGS84, y = LatWGS84, colour = Qualifier),
size = 0.8, width = 0.025, height = 0.025) +
coord_sf(xlim = c(-11, 3), ylim = c(49, 61)) +
theme_minimal() +
labs(x = "Longitude", y = "Latitude") #+
theme(legend.position = "none")List of 1
$ legend.position: chr "none"
- attr(*, "class")= chr [1:2] "theme" "gg"
- attr(*, "complete")= logi FALSE
- attr(*, "validate")= logi TRUE
rocky_mapggsave(rocky_map, filename = "Figures/JNCC_snapshot_littoral_map.jpeg")# Convert raster coastline (value=1 for land) to polygons
coast_poly <- as.polygons(terrain, dissolve = TRUE, values = TRUE)
# Keep only the polygons where coastline == 1
coast_poly <- coast_poly[coast_poly$coastline == 1, ]
# Buffer (in degrees — ~0.01° ≈ 1.1 km at equator)
buffer_dist <- 0.03 # adjust based on desired intertidal width
coast_poly_buffered <- terra::buffer(coast_poly, width = buffer_dist)
# Make a SpatVector from Lat/Long WGS84
biotype_vect <- vect(
biotype_location_reduced,
geom = c("LongWGS84", "LatWGS84"),
crs = "OGC:CRS84"
)
# Intersect points with buffered polygon
biotype_onshore_vect <- terra::intersect(biotype_vect, coast_poly_buffered)
# Convert points and polygon to sf
biotype_sf <- st_as_sf(biotype_onshore_vect)
coast_sf <- st_as_sf(coast_poly_buffered)
# Plot
jncc_snapshot_littorial_tidy_map <- ggplot() +
geom_sf(data = coast_sf, fill = "lightgray", color = "lightblue") +
geom_sf(data = biotype_sf, size = 1, color = "orange") +
coord_sf(xlim = c(-11, 3), ylim = c(49, 61)) + # adjust to your study area
theme_minimal() +
theme(legend.position = "none")
jncc_snapshot_littorial_tidy_mapggsave(jncc_snapshot_littorial_tidy_map, filename = "Figures/jncc_snapshot_littorial_tidy_map.jpeg", width = 5, height= 8)Let’s combined the two JNCC sources so we have the best cover for our environmental rocky variable.
# Ensure CRS matches
biotype_sf <- st_transform(biotype_sf, crs(rock_ukash_rast))
# Keep biotype_sf for later use (this is your vector dataset)
# Convert sf to SpatVector for rasterising
biotype_vect <- vect(biotype_sf)
# Create a template raster
rocky_template <- rast(rock_ukash_rast)
# Rasterise points
rocky_rast <- rasterize(biotype_vect, rocky_template, field = 1, background = NA)
# Combine with existing rock raster
combined_rock_rast <- cover(rocky_rast, rock_ukash_rast)
# add in missing bits (highlands coast, wales)
# top coast highlands
north_coast_ext <- ext(-5.5, -2.5, 58.2, 58.9)
r <- terrain
r_crop <- crop(r, north_coast_ext)
r_crop <- resample(r_crop, combined_rock_rast, method = "near")
combined_rock_rast <- cover(combined_rock_rast, r_crop)
# wales
wales_ext <- ext(-5.5, -4.4, 51.5, 52.2)
r_crop2 <- crop(r, wales_ext)
r_crop2 <- resample(r_crop2, combined_rock_rast, method = "near")
combined_rock_rast <- cover(combined_rock_rast, r_crop2)
# big scotland islands
isles_ext <- ext(-7.8, -5.1, 54.7, 58.7)
r_crop3 <- crop(r, isles_ext)
r_crop3 <- resample(r_crop3, combined_rock_rast, method = "near")
combined_rock_rast <- cover(combined_rock_rast, r_crop3)
# Plot
plot(combined_rock_rast, main = "Rocky substrate (binary)", col = "grey10")res(combined_rock_rast) #0.05 x 0.05[1] 0.05000000 0.05000001
# expand radius
# Calculate distance from rocky areas (in map units, e.g., meters)
dist_rast <- distance(combined_rock_rast)
|---------|---------|---------|---------|
=========================================
# Keep only cells within 5 km
expanded_rock <- dist_rast <= 8000
# Convert to binary (1 = rocky/within 1 km, NA = non-rocky)
expanded_rock[expanded_rock == 0] <- NA
expanded_rock[expanded_rock == 1] <- 1
plot(expanded_rock, main = "Expanded rocky substrate", col = "grey10")expanded_rock_coast_only <- mask(expanded_rock, terrain)
# Convert combined raster back to sf polygons if you want an sf version
combined_rock_sf <- st_as_sf(as.polygons(combined_rock_rast, dissolve = TRUE, values = TRUE))
# keep 1 as 1, set everything else to NA
rock01 <- ifel(combined_rock_rast == 1, 1, NA)
# polygonize only the 1s
pol1 <- as.polygons(rock01, dissolve = TRUE, na.rm = TRUE)
combined_rock_sf <- st_as_sf(pol1)
# optional cleanup
combined_rock_sf <- sf::st_make_valid(combined_rock_sf)
combined_rock_sf <- sf::st_cast(combined_rock_sf, "MULTIPOLYGON")
# make a raster and match format to the other environmental variables
rock_rast_combined <- raster::rasterize(combined_rock_sf, ocean_temp_present)Wave fetch (Burrows 2012)
2012 wave fetch data is downloaded from https://figshare.com/articles/dataset/Wave_fetch_GIS_layers_for_the_UK_and_Ireland_at_200m_scale/12029682?file=22107477.
Values represent the log base 10 of the summed distance to the nearest land (as the number of 200m grid cell units) across all 32 11.5° sectors. This is the data we’ll use for our models. Higher values mean more exposed, lower values mean less exposed.
# load and plot
fetch_2012 <- rast("Data/Environmental_data/log10_eu200m1a.tif")
plot(fetch_2012)# Reproject fetch to WGS84 (same CRS as ocean_temp)
fetch_wgs84 <- project(fetch_2012, ocean_temp_present)
# Resample to match ocean_temp grid (interpolates to same cells)
fetch_resampled <- resample(fetch_wgs84, ocean_temp_present, method = "bilinear")
plot(fetch_resampled)Create stacks
# stack
enviro_vars_current <- stack(c(
ocean_temp_avg,
ph_avg,
fetch_resampled
))
names(enviro_vars_current) <- c(
"ocean_temp",
"ph",
"wave_fetch"
)
names(enviro_vars_current)[1] "ocean_temp" "ph" "wave_fetch"
res(enviro_vars_current) # 0.05000000 x 0.05000001[1] 0.05000000 0.05000001
#plot
png(
"Figures/enviro_vars_current.png",
width = 2000,
height = 2000,
res = 300
)
plot(enviro_vars_current,
main = c("Sea Surface Temperature (SST)", "pH", "Wave fetch"))
dev.off()png
2
# future
enviro_vars_future <- stack(c(
ocean_temp_projected_90,
ph_projected_90,
fetch_resampled
))
names(enviro_vars_future) <- c(
"ocean_temp",
"ph",
"wave_fetch" # we need the future
)
#plot
png(
"Figures/enviro_vars_future.png",
width = 2000,
height = 2000,
res = 300
)
plot(enviro_vars_future,
main = c("Sea Surface Temperature (SST)", "pH", "Wave fetch"))
dev.off()png
2
# Combine both stacks into one for plot
enviro_both <- stack(enviro_vars_current, enviro_vars_future)
enviro_both <- dropLayer(enviro_both, nlayers(enviro_both))
# add in rocky layer
rock_rast_raster <- raster(rock_rast_combined)
names(rock_rast_raster) <- "rocky_substrate_binary"
enviro_both <- stack(enviro_both, rock_rast_raster)
#plot
# Save high-resolution plot
png(
"Figures/enviro_vars.png",
width = 2500,
height = 2000,
res = 300
)
plot(enviro_both,
main = c("Sea surface temperature (Current)",
"pH (Current)",
"Wave fetch",
"Sea surface temperature (Future)",
"pH (Future)",
"Rocky substrate")
)
dev.off()png
2
Fix unmatched occurrence points to environmental data
We can use the extract function from the raster package to check occurrences and environmental data overlap. Some points are not matched to our environmental data (we’ve got quite a few NAs). We need to find the closest point which is classed as coastline and use that instead.
Semibalanus corrections
SB MarClim
# snap the valid environmental cells from function created earlier
semibalanus_corrected_marclim <- snap_to_nearest_valid(
enviro_vars_current,
semibalanus_MarClim)
# check
envir_terra <- rast(enviro_vars_current)
presvals_semibalanus_corrected_marclim <- extract(envir_terra, semibalanus_corrected_marclim) # this looks better
# remove the ID variable
presvals_semibalanus_corrected_marclim <- presvals_semibalanus_corrected_marclim[,-1]
# check the new data
head(semibalanus_corrected_marclim) lon lat
1 -4.5592 50.8362
2 -2.3760 50.6330
3 -2.4600 50.5130
4 -2.4544 50.5414
5 -1.9450 50.6070
6 -2.4448 50.6062
# for the PA version
marclim_broad_sp_PA_semibalanus <- marclim_broad_sp_PA %>% select(-c("Chthamalus.montagui"))
semibalanus_corrected_marclim_PA <- snap_to_nearest_valid(
enviro_vars_current,
marclim_broad_sp_PA_semibalanus)
# snap the valid environmental cells from function created earlier
presvals_semibalanus_corrected_marclim_PA <- extract(envir_terra, semibalanus_corrected_marclim_PA) # this looks better
# remove the ID variable
presvals_semibalanus_corrected_marclim_PA <- presvals_semibalanus_corrected_marclim_PA[,-1]
# check the new data
head(semibalanus_corrected_marclim_PA) lon lat
1 -4.5592 50.8362
2 -2.3760 50.6330
3 -2.4600 50.5130
4 -2.4544 50.5414
5 -1.9450 50.6070
6 -2.1250 50.5750
#add PA back in
semibalanus_corrected_marclim_PA$Semibalanus.balanoides = marclim_broad_sp_PA_semibalanus$Semibalanus.balanoidesSB baseline
# snap the valid environmental cells from function created earlier
semibalanus_corrected_gbif_baseline <- snap_to_nearest_valid(
enviro_vars_current,
semibalanus_GBIF_baseline)
# check
presvals_semibalanus_corrected_gbif_baseline <- extract(envir_terra, semibalanus_corrected_gbif_baseline[, c("lon", "lat")]) # this looks better
# remove the ID variable
presvals_semibalanus_corrected_gbif_baseline <- presvals_semibalanus_corrected_gbif_baseline[,-1]
# check the new data
head(semibalanus_corrected_gbif_baseline) lon lat
1 -5.495591 50.21954
2 -5.025000 50.17500
3 -4.325000 53.12500
4 -5.061596 50.14580
5 -4.075000 50.27500
6 -5.475000 50.07500
# Count how many presence points have all variables NA (or any NA)
na_points <- apply(presvals_semibalanus_corrected_gbif_baseline, 1, function(x) any(is.na(x)))
table(na_points) #no NAsna_points
FALSE
1611
SB GBIF + GBIF
# snap the valid environmental cells from function created earlier
semibalanus_corrected_gbif <- snap_to_nearest_valid(
enviro_vars_current,
semibalanus_GBIF_GBIF)
# check
presvals_semibalanus_corrected_gbif <- extract(envir_terra, semibalanus_corrected_gbif[, c("lon", "lat")]) # this looks better
# remove the ID variable
presvals_semibalanus_corrected_gbif <- presvals_semibalanus_corrected_gbif[,-1]
# check the new data
head(semibalanus_corrected_gbif) lon lat
1 -5.495591 50.21954
2 -5.025000 50.17500
3 -4.325000 53.12500
4 -5.061596 50.14580
5 -4.075000 50.27500
6 -5.475000 50.07500
# Count how many presence points have all variables NA (or any NA)
na_points <- apply(presvals_semibalanus_corrected_gbif, 1, function(x) any(is.na(x)))
table(na_points) #no NAsna_points
FALSE
1649
SB GBIF + eDNA
# snap the valid environmental cells from function created earlier
semibalanus_corrected_GBIF_eDNA <- snap_to_nearest_valid(
enviro_vars_current,
semibalanus_GBIF_eDNA)
# check
presvals_semibalanus_corrected_GBIF_eDNA <- extract(envir_terra, semibalanus_corrected_GBIF_eDNA[, c("lon", "lat")]) # this looks better
# remove the ID variable
presvals_semibalanus_corrected_GBIF_eDNA <- presvals_semibalanus_corrected_GBIF_eDNA[,-1]
# check the new data
head(semibalanus_corrected_GBIF_eDNA) lon lat
1 -5.495591 50.21954
2 -5.025000 50.17500
3 -4.325000 53.12500
4 -5.061596 50.14580
5 -4.075000 50.27500
6 -5.475000 50.07500
# Count how many presence points have all variables NA (or any NA)
na_points <- apply(presvals_semibalanus_corrected_GBIF_eDNA, 1, function(x) any(is.na(x)))
table(na_points) #no NAsna_points
FALSE
1631
Chthamalus corrections
CM MarClim
# snap the valid environmental cells from function created earlier
chthamalus_corrected_marclim <- snap_to_nearest_valid(
enviro_vars_current,
chthamalus_MarClim)
# check
presvals_chthamalus_corrected_marclim <- extract(envir_terra, chthamalus_corrected_marclim) # this looks better
# remove the ID variable
presvals_chthamalus_corrected_marclim <- presvals_chthamalus_corrected_marclim[,-1]
# check the new data
head(chthamalus_corrected_marclim) lon lat
1 -4.5592 50.8362
2 -2.3760 50.6330
3 -2.4600 50.5130
4 -2.4544 50.5414
5 -1.9450 50.6070
6 -2.1250 50.5750
# for the PA version
# call function we created eariler to correct points
marclim_broad_sp_PA_chthamalus <- marclim_broad_sp_PA %>% select(-c("Semibalanus.balanoides"))
# snap the valid environmental cells from function created earlier
chthamalus_corrected_marclim_PA <- snap_to_nearest_valid(
enviro_vars_current,
marclim_broad_sp_PA_chthamalus)
presvals_chthamalus_corrected_marclim_PA <- extract(envir_terra, chthamalus_corrected_marclim_PA) # this looks better
# remove the ID variable
presvals_chthamalus_corrected_marclim_PA <- presvals_chthamalus_corrected_marclim_PA[,-1]
# check the new data
head(chthamalus_corrected_marclim_PA) lon lat
1 -4.5592 50.8362
2 -2.3760 50.6330
3 -2.4600 50.5130
4 -2.4544 50.5414
5 -1.9450 50.6070
6 -2.1250 50.5750
#add PA back in
chthamalus_corrected_marclim_PA$Chthamalus.montagui = marclim_broad_sp_PA_chthamalus$Chthamalus.montaguiCM baseline
# snap the valid environmental cells from function created earlier
chthamalus_corrected_gbif_baseline <- snap_to_nearest_valid(
enviro_vars_current,
chthamalus_GBIF_baseline)
# check
presvals_chthamalus_corrected_gbif_baseline <- extract(envir_terra, chthamalus_corrected_gbif_baseline[, c("lon", "lat")]) # this looks better
# remove the ID variable
presvals_chthamalus_corrected_gbif_baseline <- presvals_chthamalus_corrected_gbif_baseline[,-1]
# check the new data
head(chthamalus_corrected_gbif_baseline) lon lat
1 -5.025000 50.17500
2 -5.105353 50.41336
3 -5.495591 50.21954
4 -4.559459 50.79599
5 -5.475000 50.07500
6 -5.043342 50.14615
# Count how many presence points have all variables NA (or any NA)
na_points <- apply(presvals_chthamalus_corrected_gbif_baseline, 1, function(x) any(is.na(x)))
table(na_points) #no NAsna_points
FALSE
578
CM GBIF + GBIF
# snap the valid environmental cells from function created earlier
chthamalus_corrected_gbif <- snap_to_nearest_valid(
enviro_vars_current,
chthamalus_GBIF_GBIF)
# check
presvals_chthamalus_corrected_gbif <- extract(envir_terra, chthamalus_corrected_gbif[, c("lon", "lat")]) # this looks better
# remove the ID variable
presvals_chthamalus_corrected_gbif <- presvals_chthamalus_corrected_gbif[,-1]
# check the new data
head(chthamalus_corrected_gbif) lon lat
1 -5.025000 50.17500
2 -5.105353 50.41336
3 -5.495591 50.21954
4 -4.559459 50.79599
5 -5.475000 50.07500
6 -5.043342 50.14615
# Count how many presence points have all variables NA (or any NA)
na_points <- apply(presvals_chthamalus_corrected_gbif, 1, function(x) any(is.na(x)))
table(na_points) #no Nasna_points
FALSE
605
CM GBIF + eDNA
# snap the valid environmental cells from function created earlier
chthamalus_corrected_GBIF_eDNA <- snap_to_nearest_valid(
enviro_vars_current,
chthamalus_GBIF_eDNA)
# check
presvals_chthamalus_corrected_GBIF_eDNA <- extract(envir_terra, chthamalus_corrected_GBIF_eDNA[, c("lon", "lat")]) # this looks better
# remove the ID variable
presvals_chthamalus_corrected_GBIF_eDNA <- presvals_chthamalus_corrected_GBIF_eDNA[,-1]
# check the new data
head(chthamalus_corrected_GBIF_eDNA) lon lat
1 -5.025000 50.17500
2 -5.105353 50.41336
3 -5.495591 50.21954
4 -4.559459 50.79599
5 -5.475000 50.07500
6 -5.043342 50.14615
# Count how many presence points have all variables NA (or any NA)
na_points <- apply(presvals_chthamalus_corrected_GBIF_eDNA, 1, function(x) any(is.na(x)))
table(na_points) #no NAsna_points
FALSE
589
biomod2 workflow
We are going to build our models and forecasts using the biomod2 package. We will build four models, visualize their outputs and forecast changes into the far future (2100).
Data formatting
For our models, the GBIF base + GBIF addition and GBIF base + eDNA addition data will be used to build the individual models, being split for example 10 times into 70/30 percentages to cross-validate them (which is defined in the next section).
The MarClim data will be used to evaluate all models (individual or ensemble) - this is defined when formatting the data in this section.
Data set 1 (Semibalanus - GBIF + GBIF)
# pa as df and tidy
# marclim
semibalanus_corrected_marclim_PA_df <- as.data.frame(semibalanus_corrected_marclim_PA)
# occurences
semibalanus_corrected_gbif_df <- as.data.frame(semibalanus_corrected_gbif)
semibalanus_corrected_gbif_df$Semibalanus.balanoides <- 1 #add p for gbif
# Extract env values
env_vals <- extract(envir_terra, semibalanus_corrected_gbif)
# Bind back to coordinates
pts_with_vals <- cbind(semibalanus_corrected_gbif_df, env_vals)
# Remove NAs
pts_clean <- pts_with_vals[complete.cases(pts_with_vals), c("lon", "lat")]Now let’s put all our data elements into the BIOMOD_FormatingData function.
set.seed(seed.val.set)
myBiomodData_semibalanus_GBIF <- BIOMOD_FormatingData(
resp.var = rep(1, nrow(pts_clean)),
# presence-only
expl.var = enviro_vars_current,
resp.xy = pts_clean,
resp.name = 'Semibalanus.balanoides',
PA.nb.rep = 1,
# Number of pseudo-absence sets (10 recom by biomd)
PA.nb.absences = nrow(pts_clean),
# Number of pseudo-absences (same number of presences)
PA.strategy = 'random',
eval.resp.var = semibalanus_corrected_marclim_PA_df$Semibalanus.balanoides,
# Evaluation responses (0/1)
eval.resp.xy = semibalanus_corrected_marclim_PA_df[, c("lon", "lat")],
# Evaluation locations,
eval.expl.var = presvals_semibalanus_corrected_marclim_PA,
#filter.raster = TRUE #autofilter,
na.rm = FALSE,
seed.val = seed.val.set
)
summary(myBiomodData_semibalanus_GBIF)
# Save BIOMOD_FormatingData plot to PNG
png("Figures/myBiomodData_semibalanus_GBIF.png",
width = 2000, height = 1500, res = 300) # size in pixels, resolution in DPI
plot(myBiomodData_semibalanus_GBIF)
dev.off()Data set 2 (Semibalanus - GBIF + eDNA)
# occurences
semibalanus_corrected_GBIF_eDNA_df <- as.data.frame(semibalanus_corrected_GBIF_eDNA)
semibalanus_corrected_GBIF_eDNA_df$Semibalanus.balanoides <- 1 #add p for gbif
# Extract env values
env_vals <- extract(envir_terra, semibalanus_corrected_GBIF_eDNA)
# Bind back to coordinates
pts_with_vals <- cbind(semibalanus_corrected_GBIF_eDNA_df, env_vals)
# Remove NAs
pts_clean <- pts_with_vals[complete.cases(pts_with_vals), c("lon", "lat")]Now let’s put all our data elements into the BIOMOD_FormatingData function.
set.seed(seed.val.set)
myBiomodData_semibalanus_GBIF_eDNA <- BIOMOD_FormatingData(
resp.var = rep(1, nrow(pts_clean)) ,
# presence-only
expl.var = enviro_vars_current,
resp.xy = pts_clean,
resp.name = 'Semibalanus.balanoides',
PA.nb.rep = 1,
# Number of pseudo-absence sets (10 recom by biomd)
PA.nb.absences = nrow(pts_clean),
# Number of pseudo-absences (same number of presences)
PA.strategy = 'random',
eval.resp.var = semibalanus_corrected_marclim_PA_df$Semibalanus.balanoides,
# Evaluation responses (0/1)
eval.resp.xy = semibalanus_corrected_marclim_PA_df[, c("lon", "lat")],
# Evaluation locations,
eval.expl.var = presvals_semibalanus_corrected_marclim_PA,
#filter.raster = TRUE #autofilter
na.rm = FALSE,
seed.val = seed.val.set
)
summary(myBiomodData_semibalanus_GBIF_eDNA)
# Save BIOMOD_FormatingData plot to PNG
png("Figures/myBiomodData_semibalanus_GBIF_eDNA.png",
width = 2000, height = 1500, res = 300) # size in pixels, resolution in DPI
plot(myBiomodData_semibalanus_GBIF_eDNA)
dev.off()Data set 3 (SB - baseline)
# occurences
semibalanus_corrected_gbif_baseline_df <- as.data.frame(semibalanus_corrected_gbif_baseline)
semibalanus_corrected_gbif_baseline_df$Semibalanus.balanoides <- 1 #add p for gbif
# Extract env values
env_vals <- extract(envir_terra, semibalanus_corrected_gbif_baseline)
# Bind back to coordinates
pts_with_vals <- cbind(semibalanus_corrected_gbif_baseline_df, env_vals)
# Remove NAs
pts_clean <- pts_with_vals[complete.cases(pts_with_vals), c("lon", "lat")]set.seed(seed.val.set)
myBiomodData_semibalanus_baseline <- BIOMOD_FormatingData(
resp.var = rep(1, nrow(pts_clean)) ,
# presence-only
expl.var = enviro_vars_current,
resp.xy = pts_clean,
resp.name = 'Semibalanus.balanoides',
PA.nb.rep = 1,
# Number of pseudo-absence sets (10 recom by biomd)
PA.nb.absences = nrow(pts_clean),
# Number of pseudo-absences (same number of presences)
PA.strategy = 'random',
eval.resp.var = semibalanus_corrected_marclim_PA_df$Semibalanus.balanoides,
# Evaluation responses (0/1)
eval.resp.xy = semibalanus_corrected_marclim_PA_df[, c("lon", "lat")],
# Evaluation locations,
eval.expl.var = presvals_semibalanus_corrected_marclim_PA,
#filter.raster = TRUE #autofilter
na.rm = FALSE,
seed.val = seed.val.set
)
summary(myBiomodData_semibalanus_baseline)
# Save BIOMOD_FormatingData plot to PNG
png("Figures/myBiomodData_semibalanus_baseline.png",
width = 2000, height = 1500, res = 300) # size in pixels, resolution in DPI
plot(myBiomodData_semibalanus_baseline)
dev.off()Data set 4 (Chthamalus - GBIF + GBIF)
# pa as df and tidy
# marclim
chthamalus_corrected_marclim_PA_df <- as.data.frame(chthamalus_corrected_marclim_PA)
# occurences
chthamalus_corrected_gbif_df <- as.data.frame(chthamalus_corrected_gbif)
chthamalus_corrected_gbif_df$Chthamalus.montagui <- 1 #add p for gbif
# Extract env values
env_vals <- extract(envir_terra, chthamalus_corrected_gbif)
# Bind back to coordinates
pts_with_vals <- cbind(chthamalus_corrected_gbif_df, env_vals)
# Remove NAs
pts_clean <- pts_with_vals[complete.cases(pts_with_vals), c("lon", "lat")]set.seed(seed.val.set)
myBiomodData_chthamalus_GBIF <- BIOMOD_FormatingData(
resp.var = rep(1, nrow(pts_clean)) ,
# presence-only
expl.var = enviro_vars_current,
resp.xy = pts_clean,
resp.name = 'Chthamalus.montagui',
PA.nb.rep = 1,
# Number of pseudo-absence sets (10 recom by biomd)
PA.nb.absences = nrow(pts_clean),
# Number of pseudo-absences (same number of presences)
PA.strategy = 'random',
eval.resp.var = chthamalus_corrected_marclim_PA_df$Chthamalus.montagui,
# Evaluation responses (0/1)
eval.resp.xy = chthamalus_corrected_marclim_PA_df[, c("lon", "lat")],
# Evaluation locations,
eval.expl.var = presvals_chthamalus_corrected_marclim_PA,
#filter.raster = TRUE #autofilter
na.rm = FALSE,
seed.val = seed.val.set
)
myBiomodData_chthamalus_GBIF
# Save BIOMOD_FormatingData plot to PNG
png("Figures/myBiomodData_chthamalus_GBIF.png",
width = 2000, height = 1500, res = 300) # size in pixels, resolution in DPI
plot(myBiomodData_chthamalus_GBIF)
dev.off()Data set 5 (Chthamalus - GBIF + eDNA)
# occurences
chthamalus_corrected_GBIF_eDNA_df <- as.data.frame(chthamalus_corrected_GBIF_eDNA)
chthamalus_corrected_GBIF_eDNA_df$Chthamalus.montagui<- 1 #add p for gbif
# Extract env values
env_vals <- extract(envir_terra, chthamalus_corrected_GBIF_eDNA)
# Bind back to coordinates
pts_with_vals <- cbind(chthamalus_corrected_GBIF_eDNA_df, env_vals)
# Remove NAs
pts_clean <- pts_with_vals[complete.cases(pts_with_vals), c("lon", "lat")]set.seed(seed.val.set)
myBiomodData_chthamalus_GBIF_eDNA <- BIOMOD_FormatingData(
resp.var = rep(1, nrow(pts_clean)) ,
# presence-only
expl.var = enviro_vars_current,
resp.xy = pts_clean,
resp.name = 'Chthamalus.montagui',
PA.nb.rep = 1,
# Number of pseudo-absence sets (10 recom by biomd)
PA.nb.absences = nrow(pts_clean),
# Number of pseudo-absences (same number of presences)
PA.strategy = 'random',
eval.resp.var = chthamalus_corrected_marclim_PA_df$Chthamalus.montagui,
# Evaluation responses (0/1)
eval.resp.xy = chthamalus_corrected_marclim_PA_df[, c("lon", "lat")],
# Evaluation locations,
eval.expl.var = presvals_chthamalus_corrected_marclim_PA,
#filter.raster = TRUE #autofilter
na.rm = FALSE,
seed.val = seed.val.set
)
myBiomodData_chthamalus_GBIF_eDNA
# Save BIOMOD_FormatingData plot to PNG
png("Figures/myBiomodData_chthamalus_GBIF_eDNA.png",
width = 2000, height = 1500, res = 300) # size in pixels, resolution in DPI
plot(myBiomodData_chthamalus_GBIF_eDNA)
dev.off()Data set 6 (CM baseline)
# occurences
chthamalus_corrected_gbif_baseline_df <- as.data.frame(chthamalus_corrected_gbif_baseline)
chthamalus_corrected_gbif_baseline_df$Chthamalus.montagui <- 1 #add p for gbif
# Extract env values
env_vals <- extract(envir_terra, chthamalus_corrected_gbif_baseline)
# Bind back to coordinates
pts_with_vals <- cbind(chthamalus_corrected_gbif_baseline_df, env_vals)
# Remove NAs
pts_clean <- pts_with_vals[complete.cases(pts_with_vals), c("lon", "lat")]set.seed(seed.val.set)
myBiomodData_chthamalus_baseline <- BIOMOD_FormatingData(
resp.var = rep(1, nrow(pts_clean)) ,
# presence-only
expl.var = enviro_vars_current,
resp.xy = pts_clean,
resp.name = 'Chthamalus.montagui',
PA.nb.rep = 1,
# Number of pseudo-absence sets (10 recom by biomd)
PA.nb.absences = nrow(pts_clean),
# Number of pseudo-absences (same number of presences)
PA.strategy = 'random',
eval.resp.var = chthamalus_corrected_marclim_PA_df$Chthamalus.montagui,
# Evaluation responses (0/1)
eval.resp.xy = chthamalus_corrected_marclim_PA_df[, c("lon", "lat")],
# Evaluation locations,
eval.expl.var = presvals_chthamalus_corrected_marclim_PA,
#filter.raster = TRUE #autofilter
na.rm = FALSE,
seed.val = seed.val.set
)
myBiomodData_chthamalus_baseline
# Save BIOMOD_FormatingData plot to PNG
png("Figures/myBiomodData_chthamalus_baseline.png",
width = 2000, height = 1500, res = 300) # size in pixels, resolution in DPI
plot(myBiomodData_chthamalus_baseline)
dev.off()We now have six biomod objects ready for modelling.
#SB
summary(myBiomodData_semibalanus_baseline) dataset run PA Presences True_Absences Pseudo_Absences Undefined
1 initial NA <NA> 1611 0 0 1611
2 evaluation NA <NA> 218 18 0 0
3 calibration NA PA1 1611 0 1611 NA
summary(myBiomodData_semibalanus_GBIF) dataset run PA Presences True_Absences Pseudo_Absences Undefined
1 initial NA <NA> 1649 0 0 1649
2 evaluation NA <NA> 218 18 0 0
3 calibration NA PA1 1649 0 1649 NA
summary(myBiomodData_semibalanus_GBIF_eDNA) dataset run PA Presences True_Absences Pseudo_Absences Undefined
1 initial NA <NA> 1631 0 0 1631
2 evaluation NA <NA> 218 18 0 0
3 calibration NA PA1 1631 0 1631 NA
#CM
summary(myBiomodData_chthamalus_baseline) dataset run PA Presences True_Absences Pseudo_Absences Undefined
1 initial NA <NA> 578 0 0 578
2 evaluation NA <NA> 145 91 0 0
3 calibration NA PA1 578 0 578 NA
summary(myBiomodData_chthamalus_GBIF) dataset run PA Presences True_Absences Pseudo_Absences Undefined
1 initial NA <NA> 605 0 0 605
2 evaluation NA <NA> 145 91 0 0
3 calibration NA PA1 605 0 605 NA
summary(myBiomodData_chthamalus_GBIF_eDNA) dataset run PA Presences True_Absences Pseudo_Absences Undefined
1 initial NA <NA> 589 0 0 589
2 evaluation NA <NA> 145 91 0 0
3 calibration NA PA1 589 0 589 NA
Cross validation
Cross validation 1 (Semibalanus - GBIF + GBIF)
set.seed(seed.val.set)
cv.sv <- bm_CrossValidation(bm.format = myBiomodData_semibalanus_GBIF,
strategy = 'random',
nb.rep = 2,
perc = 0.7)
Checking Cross-Validation arguments...
> Random cross-validation selection
summary(myBiomodData_semibalanus_GBIF, calib.lines = cv.sv) dataset run PA Presences True_Absences Pseudo_Absences Undefined
1 initial <NA> <NA> 1649 0 0 1649
2 evaluation <NA> <NA> 218 18 0 0
3 calibration RUN1 PA1 1154 0 1154 NA
4 validation RUN1 PA1 495 0 495 NA
5 calibration RUN2 PA1 1154 0 1154 NA
6 validation RUN2 PA1 495 0 495 NA
# Save plot to PNG
png("Figures/myBiomodData_validation_semibalanus_visual.png",
width = 2000, height = 3000, res = 300) # size in pixels, resolution in DPI
plot(myBiomodData_semibalanus_GBIF, calib.lines = cv.sv)$data.vect
class : SpatVector
geometry : points
dimensions : 8481, 2 (geometries, attributes)
extent : -8.025, 1.975, 49.725, 60.875 (xmin, xmax, ymin, ymax)
coord. ref. :
names : resp dataset
type : <num> <chr>
values : 10 Initial dataset
10 Initial dataset
10 Initial dataset
$data.label
9 10
"**Presences**" "Presences (calibration)"
11 12
"Presences (validation)" "Presences (evaluation)"
19 20
"**True Absences**" "True Absences (calibration)"
21 22
"True Absences (validation)" "True Absences (evaluation)"
29 30
"**Pseudo-Absences**" "Pseudo-Absences (calibration)"
31 1
"Pseudo-Absences (validation)" "Background"
$data.plot
dev.off()png
2
Cross validation 2 (Semibalanus - GBIF + eDNA)
set.seed(seed.val.set)
cv.sc <- bm_CrossValidation(bm.format = myBiomodData_semibalanus_GBIF_eDNA,
strategy = 'random',
nb.rep = 2,
perc = 0.7)
Checking Cross-Validation arguments...
> Random cross-validation selection
summary(myBiomodData_semibalanus_GBIF_eDNA, calib.lines = cv.sc) dataset run PA Presences True_Absences Pseudo_Absences Undefined
1 initial <NA> <NA> 1631 0 0 1631
2 evaluation <NA> <NA> 218 18 0 0
3 calibration RUN1 PA1 1142 0 1142 NA
4 validation RUN1 PA1 489 0 489 NA
5 calibration RUN2 PA1 1142 0 1142 NA
6 validation RUN2 PA1 489 0 489 NA
# Save plot to PNG
png("Figures/myBiomodData_validation_semibalanus_combined.png",
width = 2000, height = 3000, res = 300) # size in pixels, resolution in DPI
plot(myBiomodData_semibalanus_GBIF_eDNA, calib.lines = cv.sc)$data.vect
class : SpatVector
geometry : points
dimensions : 8391, 2 (geometries, attributes)
extent : -8.025, 1.975, 49.725, 60.875 (xmin, xmax, ymin, ymax)
coord. ref. :
names : resp dataset
type : <num> <chr>
values : 10 Initial dataset
10 Initial dataset
10 Initial dataset
$data.label
9 10
"**Presences**" "Presences (calibration)"
11 12
"Presences (validation)" "Presences (evaluation)"
19 20
"**True Absences**" "True Absences (calibration)"
21 22
"True Absences (validation)" "True Absences (evaluation)"
29 30
"**Pseudo-Absences**" "Pseudo-Absences (calibration)"
31 1
"Pseudo-Absences (validation)" "Background"
$data.plot
dev.off()png
2
Cross validation 3 (SB - baseline)
set.seed(seed.val.set)
cv.sbase <- bm_CrossValidation(bm.format = myBiomodData_semibalanus_baseline,
strategy = 'random',
nb.rep = 2,
perc = 0.7)
Checking Cross-Validation arguments...
> Random cross-validation selection
summary(myBiomodData_semibalanus_baseline, calib.lines = cv.sbase) dataset run PA Presences True_Absences Pseudo_Absences Undefined
1 initial <NA> <NA> 1611 0 0 1611
2 evaluation <NA> <NA> 218 18 0 0
3 calibration RUN1 PA1 1128 0 1128 NA
4 validation RUN1 PA1 483 0 483 NA
5 calibration RUN2 PA1 1128 0 1128 NA
6 validation RUN2 PA1 483 0 483 NA
# Save plot to PNG
png("Figures/myBiomodData_validation_semibalanus_baseline.png",
width = 2000, height = 3000, res = 300) # size in pixels, resolution in DPI
plot(myBiomodData_semibalanus_baseline, calib.lines = cv.sbase)$data.vect
class : SpatVector
geometry : points
dimensions : 8291, 2 (geometries, attributes)
extent : -8.025, 1.975, 49.725, 60.875 (xmin, xmax, ymin, ymax)
coord. ref. :
names : resp dataset
type : <num> <chr>
values : 10 Initial dataset
10 Initial dataset
10 Initial dataset
$data.label
9 10
"**Presences**" "Presences (calibration)"
11 12
"Presences (validation)" "Presences (evaluation)"
19 20
"**True Absences**" "True Absences (calibration)"
21 22
"True Absences (validation)" "True Absences (evaluation)"
29 30
"**Pseudo-Absences**" "Pseudo-Absences (calibration)"
31 1
"Pseudo-Absences (validation)" "Background"
$data.plot
dev.off()png
2
Cross validation 4 (Chthamalus - GBIF + GBIF)
set.seed(seed.val.set)
cv.cv <- bm_CrossValidation(bm.format = myBiomodData_chthamalus_GBIF,
strategy = 'random',
nb.rep = 2,
perc = 0.7)
Checking Cross-Validation arguments...
> Random cross-validation selection
summary(myBiomodData_chthamalus_GBIF, calib.lines = cv.cv) dataset run PA Presences True_Absences Pseudo_Absences Undefined
1 initial <NA> <NA> 605 0 0 605
2 evaluation <NA> <NA> 145 91 0 0
3 calibration RUN1 PA1 424 0 424 NA
4 validation RUN1 PA1 181 0 181 NA
5 calibration RUN2 PA1 424 0 424 NA
6 validation RUN2 PA1 181 0 181 NA
# Save plot to PNG
png("Figures/myBiomodData_validation_chthamalus_visual.png",
width = 2000, height = 3000, res = 300) # size in pixels, resolution in DPI
plot(myBiomodData_chthamalus_GBIF, calib.lines = cv.cv)$data.vect
class : SpatVector
geometry : points
dimensions : 3261, 2 (geometries, attributes)
extent : -8.025, 1.975, 49.725, 60.825 (xmin, xmax, ymin, ymax)
coord. ref. :
names : resp dataset
type : <num> <chr>
values : 10 Initial dataset
10 Initial dataset
10 Initial dataset
$data.label
9 10
"**Presences**" "Presences (calibration)"
11 12
"Presences (validation)" "Presences (evaluation)"
19 20
"**True Absences**" "True Absences (calibration)"
21 22
"True Absences (validation)" "True Absences (evaluation)"
29 30
"**Pseudo-Absences**" "Pseudo-Absences (calibration)"
31 1
"Pseudo-Absences (validation)" "Background"
$data.plot
dev.off()png
2
Cross validation 5 (Chthamalus - GBIF + eDNA)
set.seed(seed.val.set)
cv.cc <- bm_CrossValidation(bm.format = myBiomodData_chthamalus_GBIF_eDNA,
strategy = 'random',
nb.rep = 2,
perc = 0.7)
Checking Cross-Validation arguments...
> Random cross-validation selection
summary(myBiomodData_chthamalus_GBIF_eDNA, calib.lines = cv.cc) dataset run PA Presences True_Absences Pseudo_Absences Undefined
1 initial <NA> <NA> 589 0 0 589
2 evaluation <NA> <NA> 145 91 0 0
3 calibration RUN1 PA1 412 0 412 NA
4 validation RUN1 PA1 177 0 177 NA
5 calibration RUN2 PA1 412 0 412 NA
6 validation RUN2 PA1 177 0 177 NA
# Save plot to PNG
png("Figures/myBiomodData_validation_chthamalus_combined.png",
width = 2000, height = 3000, res = 300) # size in pixels, resolution in DPI
plot(myBiomodData_chthamalus_GBIF_eDNA, calib.lines = cv.cc)$data.vect
class : SpatVector
geometry : points
dimensions : 3181, 2 (geometries, attributes)
extent : -8.025, 1.975, 49.725, 60.825 (xmin, xmax, ymin, ymax)
coord. ref. :
names : resp dataset
type : <num> <chr>
values : 10 Initial dataset
10 Initial dataset
10 Initial dataset
$data.label
9 10
"**Presences**" "Presences (calibration)"
11 12
"Presences (validation)" "Presences (evaluation)"
19 20
"**True Absences**" "True Absences (calibration)"
21 22
"True Absences (validation)" "True Absences (evaluation)"
29 30
"**Pseudo-Absences**" "Pseudo-Absences (calibration)"
31 1
"Pseudo-Absences (validation)" "Background"
$data.plot
dev.off()png
2
Cross validation 6 (CM baseline)
set.seed(seed.val.set)
cv.cbase <- bm_CrossValidation(bm.format = myBiomodData_chthamalus_baseline,
strategy = 'random',
nb.rep = 2,
perc = 0.7)
Checking Cross-Validation arguments...
> Random cross-validation selection
summary(myBiomodData_chthamalus_baseline, calib.lines = cv.cbase) dataset run PA Presences True_Absences Pseudo_Absences Undefined
1 initial <NA> <NA> 578 0 0 578
2 evaluation <NA> <NA> 145 91 0 0
3 calibration RUN1 PA1 405 0 405 NA
4 validation RUN1 PA1 173 0 173 NA
5 calibration RUN2 PA1 405 0 405 NA
6 validation RUN2 PA1 173 0 173 NA
# Save plot to PNG
png("Figures/myBiomodData_validation_chthamalus_baseline.png",
width = 2000, height = 3000, res = 300) # size in pixels, resolution in DPI
plot(myBiomodData_chthamalus_baseline, calib.lines = cv.cbase)$data.vect
class : SpatVector
geometry : points
dimensions : 3126, 2 (geometries, attributes)
extent : -8.025, 1.975, 49.725, 60.825 (xmin, xmax, ymin, ymax)
coord. ref. :
names : resp dataset
type : <num> <chr>
values : 10 Initial dataset
10 Initial dataset
10 Initial dataset
$data.label
9 10
"**Presences**" "Presences (calibration)"
11 12
"Presences (validation)" "Presences (evaluation)"
19 20
"**True Absences**" "True Absences (calibration)"
21 22
"True Absences (validation)" "True Absences (evaluation)"
29 30
"**Pseudo-Absences**" "Pseudo-Absences (calibration)"
31 1
"Pseudo-Absences (validation)" "Background"
$data.plot
dev.off()png
2
Run models
Time to run our models!
TSS_thres <- 0.35
ROC_thres <- 0.65Model 1 (Semibalanus - GBIF + GBIF)
set.seed(seed.val.set)
# Model single models
myBiomodModelOut_semibalanus_GBIF_GBIF <- BIOMOD_Modeling(
bm.format = myBiomodData_semibalanus_GBIF,
models = c('GAM','GLM','RF','MAXENT'),
modeling.id = "Semibalanus_GBIF_GBIF",
CV.strategy = 'random', #cross-validation selection strategy
CV.nb.rep = 2, #number of sets of cross-validation points
CV.perc = 0.7, #percentage kept for calibration
OPT.strategy = 'default',
var.import = 2,
metric.eval = c('TSS', 'ROC'),
seed.val = seed.val.set
)
# check which options can be altered
get_options(myBiomodModelOut_semibalanus_GBIF_GBIF)
# see output
myBiomodModelOut_semibalanus_GBIF_GBIF# Set paths
table_path <- "Data/Processed_Data/Semi.bal/Individual-GBIF-GBIF/"
figure_path <- "Figures/Semi.bal/Individual-GBIF-GBIF/"
# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)
# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodModelOut_semibalanus_GBIF_GBIF)
var_imp <- get_variables_importance(myBiomodModelOut_semibalanus_GBIF_GBIF)
write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)
# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, dataset = "calibration")algorithm_cal_plot_SB_CO <- eval_calib$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_calib$tab
write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(algorithm_cal_plot_SB_CO, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)
# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, dataset = "evaluation")algorithm_eval_plot_SB_CO <- eval_eval$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(algorithm_eval_plot_SB_CO, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)
# calibration boxplots - overall
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, group.by = c('algo', 'algo'))plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Algorithm",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 7, height = 5)
# calibration boxplots - by run
eval_boxplot_algo_run <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, group.by = c('algo', 'run'))plot <- eval_boxplot_algo_run$plot + theme_classic() + labs(fill = "Algorithm",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot_run.png"), width = 8, height = 6)
# Variable importance boxplots - overall
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, group.by = c('expl.var', 'algo', 'algo'))plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
# Variable importance boxplots - by run
val_importance_boxplot_run <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF, group.by = c('expl.var', 'algo', 'run'))plot <- val_importance_boxplot_run$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo_Run.png"), width = 8, height = 6)
# Response curves
models_to_plot <- get_built_models(myBiomodModelOut_semibalanus_GBIF_GBIF)[c(1:3, 12:14)]
# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF,
models.chosen = models_to_plot,
fixed.var = 'median')tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)
# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF,
models.chosen = models_to_plot,
fixed.var = 'min')tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)
# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF,
models.chosen = get_built_models(myBiomodModelOut_semibalanus_GBIF_GBIF)[3],
fixed.var = 'median',
do.bivariate = TRUE)plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 13, height = 7)set.seed(seed.val.set)
# Model ensemble models
myBiomodEM_semibalanus_GBIF_GBIF <- BIOMOD_EnsembleModeling(
bm.mod = myBiomodModelOut_semibalanus_GBIF_GBIF,
models.chosen = 'all',
em.by = 'all',
em.algo = c('EMmean', 'EMcv', 'EMci', 'EMca', 'EMwmean'),
metric.select = c('TSS', 'ROC'),
metric.select.thresh = c(TSS_thres, ROC_thres),
metric.eval = c('TSS', 'ROC'),
var.import = 2,
EMci.alpha = 0.05,
EMwmean.decay = 'proportional',
seed.val = seed.val.set
)
myBiomodEM_semibalanus_GBIF_GBIF# Set paths
table_path <- "Data/Processed_Data/Semi.bal/Ensemble-GBIF-GBIF/"
figure_path <- "Figures/Semi.bal/Ensemble-GBIF-GBIF/"
# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)
# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodEM_semibalanus_GBIF_GBIF)
var_imp <- get_variables_importance(myBiomodEM_semibalanus_GBIF_GBIF)
write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)
# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodEM_semibalanus_GBIF_GBIF, group.by = 'full.name')ensemble_cal_plot_SB_CO <- eval_calib$plot + theme_classic() + labs(colour = "Type") + theme(legend.position = "bottom")
tab <- eval_calib$tab
write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(ensemble_cal_plot_SB_CO, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)
# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodEM_semibalanus_GBIF_GBIF, dataset = "evaluation")ensemble_eval_plot_SB_CO <- eval_eval$plot + theme_classic() + labs(colour = "Type")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(ensemble_eval_plot_SB_CO, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)
# calibration boxplots - overall
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodEM_semibalanus_GBIF_GBIF, group.by = c('full.name', 'full.name'))plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Model",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 5, height = 3)
# Variable importance boxplots - overall
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_semibalanus_GBIF_GBIF, c('expl.var', 'full.name', 'full.name'))plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
val_importance_boxplot_merged <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_semibalanus_GBIF_GBIF, group.by = c('expl.var', 'algo', 'merged.by.run'))plot <- val_importance_boxplot_merged$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
# Response curves
models_to_plot <- get_built_models(myBiomodEM_semibalanus_GBIF_GBIF)[c(1, 6, 7)]
# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_GBIF,
models.chosen = models_to_plot,
fixed.var = 'median')tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)
# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_GBIF,
models.chosen = models_to_plot,
fixed.var = 'min')tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)
# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_GBIF,
models.chosen = get_built_models(myBiomodEM_semibalanus_GBIF_GBIF)[3],
fixed.var = 'median',
do.bivariate = TRUE)plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 15, height = 7)Model 2 (Semibalanus - GBIF + eDNA)
set.seed(seed.val.set)
# Model single models
myBiomodModelOut_semibalanus_GBIF_eDNA <- BIOMOD_Modeling(
bm.format = myBiomodData_semibalanus_GBIF_eDNA,
models = c('GAM','GLM','RF','MAXENT'),
modeling.id = "Semibalanus_GBIF_eDNA",
CV.strategy = 'random',
CV.nb.rep = 2,
CV.perc = 0.7,
OPT.strategy = 'default',
var.import = 2,
metric.eval = c('TSS', 'ROC'),
seed.val = seed.val.set
)
get_options(myBiomodModelOut_semibalanus_GBIF_eDNA)
myBiomodModelOut_semibalanus_GBIF_eDNA# Set paths
table_path <- "Data/Processed_Data/Semi.bal/Individual-GBIF-eDNA/"
figure_path <- "Figures/Semi.bal/Individual-GBIF-eDNA/"
# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)
# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodModelOut_semibalanus_GBIF_eDNA)
var_imp <- get_variables_importance(myBiomodModelOut_semibalanus_GBIF_eDNA)
write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)
# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, dataset = "calibration")algorithm_cal_plot_SB_CE <- eval_calib$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_calib$tab
write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(algorithm_cal_plot_SB_CE, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)
# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, dataset = "evaluation")algorithm_eval_plot_SB_CE <- eval_eval$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(algorithm_eval_plot_SB_CE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)
# calibration boxplots - overall
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, group.by = c('algo', 'algo'))plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Algorithm",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 7, height = 5)
# calibration boxplots - by run
eval_boxplot_algo_run <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, group.by = c('algo', 'run'))plot <- eval_boxplot_algo_run$plot + theme_classic() + labs(fill = "Algorithm",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot_run.png"), width = 8, height = 6)
# Variable importance boxplots - overall
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, group.by = c('expl.var', 'algo', 'algo'))plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
# Variable importance boxplots - by run
val_importance_boxplot_run <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA, group.by = c('expl.var', 'algo', 'run'))plot <- val_importance_boxplot_run$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo_Run.png"), width = 8, height = 6)
# Response curves
models_to_plot <- get_built_models(myBiomodModelOut_semibalanus_GBIF_eDNA)[c(1:3, 12:14)]
# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA,
models.chosen = models_to_plot,
fixed.var = 'median')tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)
# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA,
models.chosen = models_to_plot,
fixed.var = 'min')tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)
# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA,
models.chosen = get_built_models(myBiomodModelOut_semibalanus_GBIF_eDNA)[3],
fixed.var = 'median',
do.bivariate = TRUE)plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 13, height = 7)set.seed(seed.val.set)
# Model ensemble models
myBiomodEM_semibalanus_GBIF_eDNA <- BIOMOD_EnsembleModeling(
bm.mod = myBiomodModelOut_semibalanus_GBIF_eDNA,
models.chosen = 'all',
em.by = 'all',
em.algo = c('EMmean', 'EMcv', 'EMci', 'EMca', 'EMwmean'),
metric.select = c('TSS', 'ROC'),
metric.select.thresh = c(TSS_thres, ROC_thres),
metric.eval = c('TSS', 'ROC'),
var.import = 2,
EMci.alpha = 0.05,
EMwmean.decay = 'proportional',
seed.val = seed.val.set
)
myBiomodEM_semibalanus_GBIF_eDNA# Set paths
table_path <- "Data/Processed_Data/Semi.bal/Ensemble-GBIF-eDNA/"
figure_path <- "Figures/Semi.bal/Ensemble-GBIF-eDNA/"
# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)
# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodEM_semibalanus_GBIF_eDNA)
var_imp <- get_variables_importance(myBiomodEM_semibalanus_GBIF_eDNA)
write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)
# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, group.by = 'full.name')plot <- eval_calib$plot + theme_classic() + labs(colour = "Type")
tab <- eval_calib$tab
write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)
# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, dataset = "evaluation")ensemble_eval_plot_SB_CE <- eval_eval$plot + theme_classic() + labs(colour = "Type")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(ensemble_eval_plot_SB_CE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)
# calibration boxplots - overall
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, group.by = c('full.name', 'full.name'))plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Type",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 10, height = 5)
# Variable importance boxplots - overall
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, c('expl.var', 'full.name', 'full.name'))plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
val_importance_boxplot_merged <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_semibalanus_GBIF_eDNA, group.by = c('expl.var', 'algo', 'merged.by.run'))plot <- val_importance_boxplot_merged$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
# Response curves
models_to_plot <- models_to_plot <- get_built_models(myBiomodEM_semibalanus_GBIF_eDNA)[c(1, 6, 7)]
# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_eDNA,
models.chosen = models_to_plot,
fixed.var = 'median')tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)
# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_eDNA,
models.chosen = models_to_plot,
fixed.var = 'min')tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)
# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_eDNA,
models.chosen = get_built_models(myBiomodEM_semibalanus_GBIF_eDNA)[7],
fixed.var = 'median',
do.bivariate = TRUE)plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 15, height = 7)Model 3 (SB baseline)
set.seed(seed.val.set)
# Model single models
myBiomodModelOut_semibalanus_baseline <- BIOMOD_Modeling(
bm.format = myBiomodData_semibalanus_baseline,
models = c('GAM','GLM','RF','MAXENT'),
modeling.id = "Semibalanus_baseline",
CV.strategy = 'random',
CV.nb.rep = 2,
CV.perc = 0.7,
OPT.strategy = 'default',
var.import = 2,
metric.eval = c('TSS', 'ROC'),
seed.val = seed.val.set
)
get_options(myBiomodModelOut_semibalanus_baseline)
myBiomodModelOut_semibalanus_baseline# Set paths
table_path <- "Data/Processed_Data/Semi.bal/Individual-baseline/"
figure_path <- "Figures/Semi.bal/Individual-baseline/"
# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)
# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodModelOut_semibalanus_baseline)
var_imp <- get_variables_importance(myBiomodModelOut_semibalanus_baseline)
write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)
# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodModelOut_semibalanus_baseline, dataset = "calibration")algorithm_cal_plot_SB_BASE <- eval_calib$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_calib$tab
write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(algorithm_cal_plot_SB_BASE, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)
# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodModelOut_semibalanus_baseline, dataset = "evaluation")algorithm_eval_plot_SB_BASE <- eval_eval$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(algorithm_eval_plot_SB_BASE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)
# calibration boxplots - overall
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_semibalanus_baseline, group.by = c('algo', 'algo'))plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Algorithm",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 7, height = 5)
# calibration boxplots - by run
eval_boxplot_algo_run <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_semibalanus_baseline, group.by = c('algo', 'run'))plot <- eval_boxplot_algo_run$plot + theme_classic() + labs(fill = "Algorithm",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot_run.png"), width = 8, height = 6)
# Variable importance boxplots - overall
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_semibalanus_baseline, group.by = c('expl.var', 'algo', 'algo'))plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
# Variable importance boxplots - by run
val_importance_boxplot_run <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_semibalanus_baseline, group.by = c('expl.var', 'algo', 'run'))plot <- val_importance_boxplot_run$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo_Run.png"), width = 8, height = 6)
# Response curves
models_to_plot <- get_built_models(myBiomodModelOut_semibalanus_baseline)[c(1:3, 12:14)]
# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_baseline,
models.chosen = models_to_plot,
fixed.var = 'median')tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)
# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA,
models.chosen = models_to_plot,
fixed.var = 'min')tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)
# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_baseline,
models.chosen = get_built_models(myBiomodModelOut_semibalanus_baseline)[3],
fixed.var = 'median',
do.bivariate = TRUE)plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 13, height = 7)set.seed(seed.val.set)
# Model ensemble models
myBiomodEM_semibalanus_baseline <- BIOMOD_EnsembleModeling(
bm.mod = myBiomodModelOut_semibalanus_baseline,
models.chosen = 'all',
em.by = 'all',
em.algo = c('EMmean', 'EMcv', 'EMci', 'EMca', 'EMwmean'),
metric.select = c('TSS', 'ROC'),
metric.select.thresh = c(TSS_thres, ROC_thres),
metric.eval = c('TSS', 'ROC'),
var.import = 2,
EMci.alpha = 0.05,
EMwmean.decay = 'proportional',
seed.val = seed.val.set
)
myBiomodEM_semibalanus_baseline# Set paths
table_path <- "Data/Processed_Data/Semi.bal/Ensemble-baseline/"
figure_path <- "Figures/Semi.bal/Ensemble-baseline/"
# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)
# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodEM_semibalanus_baseline)
var_imp <- get_variables_importance(myBiomodEM_semibalanus_baseline)
write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)
# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodEM_semibalanus_baseline, group.by = 'full.name')plot <- eval_calib$plot + theme_classic() + labs(colour = "Type")
tab <- eval_calib$tab
write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)
# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodEM_semibalanus_baseline, dataset = "evaluation")ensemble_eval_plot_SB_BASE <- eval_eval$plot + theme_classic() + labs(colour = "Type")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(ensemble_eval_plot_SB_BASE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)
# calibration boxplots - overall
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodEM_semibalanus_baseline, group.by = c('full.name', 'full.name'))plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Type",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 10, height = 5)
# Variable importance boxplots - overall
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_semibalanus_baseline, c('expl.var', 'full.name', 'full.name'))plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
val_importance_boxplot_merged <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_semibalanus_baseline, group.by = c('expl.var', 'algo', 'merged.by.run'))plot <- val_importance_boxplot_merged$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
# Response curves
models_to_plot <- models_to_plot <- get_built_models(myBiomodEM_semibalanus_baseline)[c(1, 6, 7)]
# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_baseline,
models.chosen = models_to_plot,
fixed.var = 'median')tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)
# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_baseline,
models.chosen = models_to_plot,
fixed.var = 'min')tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)
# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_baseline,
models.chosen = get_built_models(myBiomodEM_semibalanus_baseline)[7],
fixed.var = 'median',
do.bivariate = TRUE)plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 15, height = 7)Semibalanus assessment figures
SB_assessment_plot <- cowplot::plot_grid(
ensemble_eval_plot_SB_CO,
ensemble_eval_plot_SB_CE,
algorithm_cal_plot_SB_CO,
algorithm_cal_plot_SB_CE,
algorithm_eval_plot_SB_CO,
algorithm_eval_plot_SB_CE,
ncol = 2,
labels = c("a) Ensemble (GBIF + GBIF)",
"b) Ensemble (GBIF-eDNA)",
"c) Algorithms (Calibration, GBIF + GBIF)",
"d) Algorithms (Calibration, GBIF-eDNA)",
"e) Algorithms (Evaluation, GBIF + GBIF)",
"f) Algorithms (Evaluation, GBIF-eDNA)"),
align = "v",
label_x = 0, # left align labels (0 = far left, 1 = far right)
label_y = 1, # push labels to the very top
hjust = 0, # left-justify text relative to x-position
vjust = 1, # align with top edge
label_size = 10
)
SB_assessment_plot# Save the plot
ggsave(SB_assessment_plot, filename = "Figures/SB_assessment_plot.png", width = 12, height = 15, dpi = 300)Model 4 (Chthamalus - GBIF + GBIF)
set.seed(seed.val.set)
# Model single models
myBiomodModelOut_chthamalus_GBIF <- BIOMOD_Modeling(
bm.format = myBiomodData_chthamalus_GBIF,
models = c('GAM','GLM','RF','MAXENT'),
modeling.id = "chthamalus_GBIF_GBIF",
CV.strategy = 'random',
CV.nb.rep = 2,
CV.perc = 0.7,
OPT.strategy = 'default',
var.import = 2,
metric.eval = c('TSS', 'ROC'),
seed.val = seed.val.set
)
get_options(myBiomodModelOut_chthamalus_GBIF)
myBiomodModelOut_chthamalus_GBIF# Set paths
table_path <- "Data/Processed_Data/Chtham.mont/Individual-GBIF-GBIF/"
figure_path <- "Figures/Chtham.mont/Individual-GBIF-GBIF/"
# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)
# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodModelOut_chthamalus_GBIF)
var_imp <- get_variables_importance(myBiomodModelOut_chthamalus_GBIF)
write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)
# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodModelOut_chthamalus_GBIF, dataset = "calibration")algorithm_cal_plot_CM_CO <- eval_calib$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_calib$tab
write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(algorithm_cal_plot_CM_CO, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)
# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodModelOut_chthamalus_GBIF, dataset = "evaluation")algorithm_eval_plot_CM_CO <- eval_eval$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(algorithm_eval_plot_CM_CO, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)
# calibration boxplots - overall
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF, group.by = c('algo', 'algo'))plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Algorithm",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 7, height = 5)
# calibration boxplots - by run
eval_boxplot_algo_run <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF, group.by = c('algo', 'run'))plot <- eval_boxplot_algo_run$plot + theme_classic() + labs(fill = "Algorithm",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot_run.png"), width = 8, height = 6)
# Variable importance boxplots - overall
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF, group.by = c('expl.var', 'algo', 'algo'))plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
# Variable importance boxplots - by run
val_importance_boxplot_run <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF, group.by = c('expl.var', 'algo', 'run'))plot <- val_importance_boxplot_run$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo_Run.png"), width = 8, height = 6)
# Response curves
models_to_plot <- get_built_models(myBiomodModelOut_chthamalus_GBIF)[c(1:3, 12:14)]
# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF,
models.chosen = models_to_plot,
fixed.var = 'median')tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)
# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF,
models.chosen = models_to_plot,
fixed.var = 'min')tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)
# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF,
models.chosen = get_built_models(myBiomodModelOut_chthamalus_GBIF)[3],
fixed.var = 'median',
do.bivariate = TRUE)plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 13, height = 7)set.seed(seed.val.set)
# Model ensemble models (takes around 30 mins)
myBiomodEM_chthamalus_GBIF_GBIF <- BIOMOD_EnsembleModeling(
bm.mod = myBiomodModelOut_chthamalus_GBIF,
models.chosen = 'all',
em.by = 'all',
em.algo = c('EMmean', 'EMcv', 'EMci', 'EMca', 'EMwmean'),
metric.select = c('TSS', 'ROC'),
metric.select.thresh = c(TSS_thres, ROC_thres),
metric.eval = c('TSS', 'ROC'),
var.import = 2,
EMci.alpha = 0.05,
EMwmean.decay = 'proportional',
seed.val = seed.val.set
)
myBiomodEM_chthamalus_GBIF_GBIF# Set paths
table_path <- "Data/Processed_Data/Chtham.mont/Ensemble-GBIF-GBIF/"
figure_path <- "Figures/Chtham.mont/Ensemble-GBIF-GBIF/"
# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)
# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodEM_chthamalus_GBIF_GBIF)
var_imp <- get_variables_importance(myBiomodEM_chthamalus_GBIF_GBIF)
write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)
# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodEM_chthamalus_GBIF_GBIF, group.by = 'full.name')plot <- eval_calib$plot + theme_classic() + labs(colour = "Type")
tab <- eval_calib$tab
write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 10, height = 5)
# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodEM_chthamalus_GBIF_GBIF, dataset = "evaluation")ensemble_eval_plot_CM_CO <- eval_eval$plot + theme_classic() + labs(colour = "Type")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(ensemble_eval_plot_CM_CO, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)
# calibration boxplots - overall
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodEM_chthamalus_GBIF_GBIF, group.by = c('full.name', 'full.name'))plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Type",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 7, height = 5)
# Variable importance boxplots - overall
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_chthamalus_GBIF_GBIF, c('expl.var', 'full.name', 'full.name'))plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Type")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
val_importance_boxplot_merged <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_chthamalus_GBIF_GBIF, group.by = c('expl.var', 'algo', 'merged.by.run'))plot <- val_importance_boxplot_merged$plot + theme_classic() + labs(fill = "Type")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
# Response curves
models_to_plot <- get_built_models(myBiomodEM_chthamalus_GBIF_GBIF)[c(1, 6, 7)]
# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_GBIF,
models.chosen = models_to_plot,
fixed.var = 'median')tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)
# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_GBIF,
models.chosen = models_to_plot,
fixed.var = 'min')tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)
# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_GBIF,
models.chosen = get_built_models(myBiomodEM_chthamalus_GBIF_GBIF)[7],
fixed.var = 'median',
do.bivariate = TRUE)plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 15, height = 7)Model 5 (Chthamalus - GBIF + eDNA)
set.seed(seed.val.set)
# Model single models
myBiomodModelOut_chthamalus_GBIF_eDNA <- BIOMOD_Modeling(
bm.format = myBiomodData_chthamalus_GBIF_eDNA,
models = c('GAM','GLM','RF','MAXENT'),
modeling.id = "chthamalus_GBIF_eDNA",
CV.strategy = 'random',
CV.nb.rep = 2,
CV.perc = 0.7,
OPT.strategy = 'default',
var.import = 2,
metric.eval = c('TSS', 'ROC'),
seed.val = seed.val.set
)
get_options(myBiomodModelOut_chthamalus_GBIF_eDNA)
myBiomodModelOut_chthamalus_GBIF_eDNA# Set paths
table_path <- "Data/Processed_Data/Chtham.mont/Individual-GBIF-eDNA/"
figure_path <- "Figures/Chtham.mont/Individual-GBIF-eDNA/"
# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)
# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodModelOut_chthamalus_GBIF_eDNA)
var_imp <- get_variables_importance(myBiomodModelOut_chthamalus_GBIF_eDNA)
write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)
# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, dataset = "calibration")algorithm_cal_plot_CM_CE <- eval_calib$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_calib$tab
write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(algorithm_cal_plot_CM_CE, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)
# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, dataset = "evaluation")algorithm_eval_plot_CM_CE <- eval_eval$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(algorithm_eval_plot_CM_CE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)
# calibration boxplots - overall
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, group.by = c('algo', 'algo'))plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Algorithm",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 7, height = 5)
# calibration boxplots - by run
eval_boxplot_algo_run <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, group.by = c('algo', 'run'))plot <- eval_boxplot_algo_run$plot + theme_classic() + labs(fill = "Algorithm",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot_run.png"), width = 8, height = 6)
# Variable importance boxplots - overall
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, group.by = c('expl.var', 'algo', 'algo'))plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
# Variable importance boxplots - by run
val_importance_boxplot_run <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA, group.by = c('expl.var', 'algo', 'run'))plot <- val_importance_boxplot_run$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo_Run.png"), width = 8, height = 6)
# Response curves
models_to_plot <- get_built_models(myBiomodModelOut_chthamalus_GBIF_eDNA)[c(1:3, 12:14)]
# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA,
models.chosen = models_to_plot,
fixed.var = 'median')tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)
# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA,
models.chosen = models_to_plot,
fixed.var = 'min')tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)
# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA,
models.chosen = get_built_models(myBiomodModelOut_chthamalus_GBIF_eDNA)[3],
fixed.var = 'median',
do.bivariate = TRUE)plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 13, height = 7)set.seed(seed.val.set)
# Model ensemble models (takes around 30 mins)
myBiomodEM_chthamalus_GBIF_eDNA <- BIOMOD_EnsembleModeling(
bm.mod = myBiomodModelOut_chthamalus_GBIF_eDNA,
models.chosen = 'all',
em.by = 'all',
em.algo = c('EMmean', 'EMcv', 'EMci', 'EMca', 'EMwmean'),
metric.select = c('TSS', 'ROC'),
metric.select.thresh = c(TSS_thres, ROC_thres),
metric.eval = c('TSS', 'ROC'),
var.import = 2,
EMci.alpha = 0.05,
EMwmean.decay = 'proportional',
seed.val = seed.val.set
)
myBiomodEM_chthamalus_GBIF_eDNA# Set paths
table_path <- "Data/Processed_Data/Chtham.mont/Ensemble-GBIF-eDNA/"
figure_path <- "Figures/Chtham.mont/Ensemble-GBIF-eDNA/"
# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)
# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodEM_chthamalus_GBIF_eDNA)
var_imp <- get_variables_importance(myBiomodEM_chthamalus_GBIF_eDNA)
write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)
# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, group.by = 'full.name')plot <- eval_calib$plot + theme_classic() + labs(colour = "Type")
tab <- eval_calib$tab
write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 10, height = 5)
# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, dataset = "evaluation")ensemble_eval_plot_CM_CE <- eval_eval$plot + theme_classic() + labs(colour = "Type")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(ensemble_eval_plot_CM_CE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)
# calibration boxplots - overall
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, group.by = c('full.name', 'full.name'))plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Type",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 10, height = 5)
# Variable importance boxplots - overall
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, c('expl.var', 'full.name', 'full.name'))plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Type")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
val_importance_boxplot_merged <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_chthamalus_GBIF_eDNA, group.by = c('expl.var', 'algo', 'merged.by.run'))plot <- val_importance_boxplot_merged$plot + theme_classic() + labs(fill = "Type")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
# Response curves
models_to_plot <- get_built_models(myBiomodEM_chthamalus_GBIF_eDNA)[c(1, 6, 7)]
# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_eDNA,
models.chosen = models_to_plot,
fixed.var = 'median')tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)
# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_eDNA,
models.chosen = models_to_plot,
fixed.var = 'min')tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)
# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_eDNA,
models.chosen = get_built_models(myBiomodEM_chthamalus_GBIF_eDNA)[7],
fixed.var = 'median',
do.bivariate = TRUE)plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 15, height = 7)Model 6 (CM - baseline)
set.seed(seed.val.set)
# Model single models
myBiomodModelOut_chthamalus_baseline <- BIOMOD_Modeling(
bm.format = myBiomodData_chthamalus_baseline,
models = c('GAM','GLM','RF','MAXENT'),
modeling.id = "chthamalus_baseline",
CV.strategy = 'random',
CV.nb.rep = 2,
CV.perc = 0.7,
OPT.strategy = 'default',
var.import = 2,
metric.eval = c('TSS', 'ROC'),
seed.val = seed.val.set
)
get_options(myBiomodModelOut_chthamalus_baseline)
myBiomodModelOut_chthamalus_baseline# Set paths
table_path <- "Data/Processed_Data/Chtham.mont/Individual-baseline/"
figure_path <- "Figures/Chtham.mont/Individual-baseline/"
# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)
# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodModelOut_chthamalus_baseline)
var_imp <- get_variables_importance(myBiomodModelOut_chthamalus_baseline)
write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)
# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodModelOut_chthamalus_baseline, dataset = "calibration")algorithm_cal_plot_cm_BASE <- eval_calib$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_calib$tab
write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(algorithm_cal_plot_cm_BASE, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)
# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodModelOut_chthamalus_baseline, dataset = "evaluation")algorithm_eval_plot_cm_BASE <- eval_eval$plot + theme_classic() + labs(colour = "Algorithm")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(algorithm_eval_plot_cm_BASE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)
# calibration boxplots - overall
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_chthamalus_baseline, group.by = c('algo', 'algo'))plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Algorithm",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 7, height = 5)
# calibration boxplots - by run
eval_boxplot_algo_run <- bm_PlotEvalBoxplot(bm.out = myBiomodModelOut_chthamalus_baseline, group.by = c('algo', 'run'))plot <- eval_boxplot_algo_run$plot + theme_classic() + labs(fill = "Algorithm",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot_run.png"), width = 8, height = 6)
# Variable importance boxplots - overall
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_chthamalus_baseline, group.by = c('expl.var', 'algo', 'algo'))plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Algorithm")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
# Variable importance boxplots - by run
val_importance_boxplot_run <- bm_PlotVarImpBoxplot(bm.out = myBiomodModelOut_chthamalus_baseline, group.by = c('expl.var', 'algo', 'run'))plot <- val_importance_boxplot_run$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo_Run.png"), width = 8, height = 6)
# Response curves
models_to_plot <- get_built_models(myBiomodModelOut_chthamalus_baseline)[c(1:3, 12:14)]
# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_baseline,
models.chosen = models_to_plot,
fixed.var = 'median')tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)
# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA,
models.chosen = models_to_plot,
fixed.var = 'min')tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)
# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_baseline,
models.chosen = get_built_models(myBiomodModelOut_chthamalus_baseline)[3],
fixed.var = 'median',
do.bivariate = TRUE)plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 13, height = 7)set.seed(seed.val.set)
# Model ensemble models
myBiomodEM_chthamalus_baseline <- BIOMOD_EnsembleModeling(
bm.mod = myBiomodModelOut_chthamalus_baseline,
models.chosen = 'all',
em.by = 'all',
em.algo = c('EMmean', 'EMcv', 'EMci', 'EMca', 'EMwmean'),
metric.select = c('TSS', 'ROC'),
metric.select.thresh = c(TSS_thres, ROC_thres),
metric.eval = c('TSS', 'ROC'),
var.import = 2,
EMci.alpha = 0.05,
EMwmean.decay = 'proportional',
seed.val = seed.val.set
)
myBiomodEM_chthamalus_baseline# Set paths
table_path <- "Data/Processed_Data/Chtham.mont/Ensemble-baseline/"
figure_path <- "Figures/Chtham.mont/Ensemble-baseline/"
# Create folders if they don't exist
if (!dir.exists(table_path)) dir.create(table_path, recursive = TRUE)
if (!dir.exists(figure_path)) dir.create(figure_path, recursive = TRUE)
# Get evaluation scores & variable importance tables
eval_scores <- get_evaluations(myBiomodEM_chthamalus_baseline)
var_imp <- get_variables_importance(myBiomodEM_chthamalus_baseline)
write.csv(eval_scores, paste0(table_path, "Evaluations_All.csv"), row.names = TRUE)
write.csv(var_imp, paste0(table_path, "VariablesImportance_All.csv"), row.names = TRUE)
# calibration means
eval_calib <- bm_PlotEvalMean(bm.out = myBiomodEM_chthamalus_baseline, group.by = 'full.name')plot <- eval_calib$plot + theme_classic() + labs(colour = "Type")
tab <- eval_calib$tab
write.csv(tab, paste0(table_path, "EvalMean_Calibration.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "EvalMean_Calibration.png"), width = 7, height = 5)
# evaluation means
eval_eval <- bm_PlotEvalMean(bm.out = myBiomodEM_chthamalus_baseline, dataset = "evaluation")ensemble_eval_plot_cm_BASE <- eval_eval$plot + theme_classic() + labs(colour = "Type")
tab <- eval_eval$tab
write.csv(tab, paste0(table_path, "EvalMean_Evaluation.csv"), row.names = TRUE)
ggsave(ensemble_eval_plot_cm_BASE, file = paste0(figure_path, "EvalMean_Evaluation.png"), width = 7, height = 5)
# calibration boxplots - overall
# table produced here same as above, already saved
eval_boxplot_algo <- bm_PlotEvalBoxplot(bm.out = myBiomodEM_chthamalus_baseline, group.by = c('full.name', 'full.name'))plot <- eval_boxplot_algo$plot + theme_classic() + labs(fill = "Type",
y = "Calibration")
ggsave(plot, file = paste0(figure_path, "EvalMean_boxplot.png"), width = 10, height = 5)
# Variable importance boxplots - overall
# figures (table produced here same as above, already saved
val_importance_boxplot <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_chthamalus_baseline, c('expl.var', 'full.name', 'full.name'))plot <- val_importance_boxplot$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
val_importance_boxplot_merged <- bm_PlotVarImpBoxplot(bm.out = myBiomodEM_chthamalus_baseline, group.by = c('expl.var', 'algo', 'merged.by.run'))plot <- val_importance_boxplot_merged$plot + theme_classic() + labs(fill = "Model")
ggsave(plot, file = paste0(figure_path, "VarImpBoxplot_ExplVar_Algo.png"), width = 8, height = 8)
# Response curves
models_to_plot <- models_to_plot <- get_built_models(myBiomodEM_chthamalus_baseline)[c(1, 6, 7)]
# median
response_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_baseline,
models.chosen = models_to_plot,
fixed.var = 'median')tab <- response_median$tab
plot <- response_median$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "ResponseCurves_Median.png"), width = 13, height = 7)
# min
response_min<-bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_baseline,
models.chosen = models_to_plot,
fixed.var = 'min')tab <- response_min$tab
plot <- response_min$plot + theme_classic() + labs(colour = "Prediction Name")
write.csv(tab, paste0(table_path, "pred.vals.median.csv"), row.names = TRUE)
ggsave(plot, file = paste0(figure_path, "pred.vals.median.png"), width = 13, height = 7)
# heat map - median
response_heatmap_median <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_baseline,
models.chosen = get_built_models(myBiomodEM_chthamalus_baseline)[7],
fixed.var = 'median',
do.bivariate = TRUE)plot <- response_heatmap_median$plot + theme_classic()
ggsave(plot, file = paste0(figure_path, "ResponseCurve_Bivariate_Median.png"), width = 15, height = 7)Chthamalus assessment figures
CM_assessment_plot <- cowplot::plot_grid(
ensemble_eval_plot_CM_CO,
ensemble_eval_plot_CM_CE,
algorithm_cal_plot_CM_CO,
algorithm_cal_plot_CM_CE,
algorithm_eval_plot_CM_CO,
algorithm_eval_plot_CM_CE,
ncol = 2,
labels = c(
"a) Ensemble (GBIF + GBIF)",
"b) Ensemble (GBIF-eDNA)",
"c) Algorithms (Calibration, GBIF + GBIF)",
"d) Algorithms (Calibration, GBIF-eDNA)",
"e) Algorithms (Evaluation, GBIF + GBIF)",
"f) Algorithms (Evaluation, GBIF-eDNA)"
),
align = "v",
label_x = 0, # left align labels (0 = far left, 1 = far right)
label_y = 1, # push labels to the very top
hjust = 0, # left-justify text relative to x-position
vjust = 1, # align with top edge
label_size = 10
)
CM_assessment_plot# Save the plot
ggsave(CM_assessment_plot, filename = "Figures/CM_assessment_plot.png", width = 12, height = 15, dpi = 300)Current projections
We are going to run projections and mask our rocky substrate layer over projections. This ensures that any areas without rocky habitats won’t be included in our final maps.
Projection 1 (Semibalanus - GBIF + GBIF)
#just using mean models from now on
myBiomodEMProj_semibalanus_visual_GBIF_GBIF <- BIOMOD_EnsembleForecasting(
bm.em = myBiomodEM_semibalanus_GBIF_GBIF,
proj.name = 'CurrentEM.semibalanus.GBIF',
new.env = enviro_vars_current,
models.chosen = c(
'Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo',
'Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo')
#metric.binary = 'all',
#metric.filter = 'all'
)
myBiomodEMProj_semibalanus_visual_GBIF_GBIF
# visualise
proj_semibalanus_visual <- get_predictions(myBiomodEMProj_semibalanus_visual_GBIF_GBIF) # returns a rasterLayer
plot(proj_semibalanus_visual)# after rocky mask
proj_semibalanus_visual_masked <- mask(proj_semibalanus_visual, expanded_rock)
plot(proj_semibalanus_visual_masked)# Choose the first layer
r_layer <- proj_semibalanus_visual_masked[[1]]
# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
filter(!(x > -8.2 & x < -5.3 &
y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland
filter(!(x > -8.0 & x < -6.0 &
y > 55.1 & y < 55.6))
colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_SB_visual <- r_df
# plot
semibalanus_visual_current_plot_TSS <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "lightgray") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
scale_fill_viridis_c(option = "viridis",
na.value = "transparent",
limits = c(0, 1)) +
coord_sf(
xlim = c(-8, 3),
ylim = c(49.7, 61),
expand = FALSE
) +
theme_minimal(base_size = 14) +
labs(fill = NULL) +
guides(
fill = guide_colorbar(
barheight = unit(0.5, "npc"), # ~80% of plotting height
barwidth = unit(0.02, "npc"), # slim vertical bar
title.position = "top"
)
) +
theme(
legend.position = "right", # place bar to the right
axis.title.x = element_blank(), # remove x-axis title
axis.title.y = element_blank(), # remove y-axis title
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA)
)
semibalanus_visual_current_plot_TSSProjection 2 (Semibalanus - GBIF + eDNA)
myBiomodEMProj_semibalanus_GBIF_eDNA <- BIOMOD_EnsembleForecasting(
bm.em = myBiomodEM_semibalanus_GBIF_eDNA,
proj.name = 'CurrentEM.semibalanus.GBIF.eDNA',
new.env = enviro_vars_current,
models.chosen = c(
'Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo',
'Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo')
#metric.binary = 'all',
#metric.filter = 'all'
)
myBiomodEMProj_semibalanus_GBIF_eDNA
# visualise
proj_semibalanus_combined <- get_predictions(myBiomodEMProj_semibalanus_GBIF_eDNA) # returns a rasterLayer
plot(proj_semibalanus_combined)# after rocky mask
proj_semibalanus_combined_masked <- mask(proj_semibalanus_combined, expanded_rock)
plot(proj_semibalanus_combined_masked)# Choose the first layer
r_layer <- proj_semibalanus_combined_masked[[1]]
# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
filter(!(x > -8.2 & x < -5.3 &
y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland
filter(!(x > -8.0 & x < -6.0 &
y > 55.1 & y < 55.6))
colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_SB_combined <- r_df
# plot
semibalanus_combined_current_plot_TSS <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "lightgray") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
scale_fill_viridis_c(option = "viridis",
na.value = "transparent",
limits = c(0, 1)) +
labs(fill = NULL) +
coord_sf(
xlim = c(-8, 3),
ylim = c(49.7, 61),
expand = FALSE
) +
theme_minimal(base_size = 14) +
guides(
fill = guide_colorbar(
barheight = unit(0.5, "npc"), # ~80% of plotting height
barwidth = unit(0.02, "npc") # slim vertical bar
)
) +
theme(
legend.position = "right", # put the colorbar on the right
axis.title.x = element_blank(), # remove x-axis title
axis.title.y = element_blank(), # remove y-axis title
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA)
)
semibalanus_combined_current_plot_TSSProjection 3 (Chthamalus - GBIF + GBIF)
myBiomodEMProj_chthamalus_GBIF_GBIF <- BIOMOD_EnsembleForecasting(
bm.em = myBiomodEM_chthamalus_GBIF_GBIF,
proj.name = 'CurrentEM.chthamalus.GBIF',
new.env = enviro_vars_current,
models.chosen = c(
'Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo',
'Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo')
#metric.binary = 'all',
#metric.filter = 'all'
)
myBiomodEMProj_chthamalus_GBIF_GBIF
# visualise
proj_chthamalus_visual <- get_predictions(myBiomodEMProj_chthamalus_GBIF_GBIF) # returns a rasterLayer
plot(proj_chthamalus_visual)# after rocky mask
proj_chthamalus_visual_masked <- mask(proj_chthamalus_visual, expanded_rock)
plot(proj_chthamalus_visual_masked)# Choose the first layer
r_layer <- proj_chthamalus_visual_masked[[1]]
# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
filter(!(x > -8.2 & x < -5.3 &
y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland
filter(!(x > -8.0 & x < -6.0 &
y > 55.1 & y < 55.6))
colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_CM_visual <- r_df
# plot
chthamalus_visual_current_plot_TSS <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "lightgray") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
scale_fill_viridis_c(option = "viridis",
na.value = "transparent",
limits = c(0, 1)) +
labs(fill = NULL) +
coord_sf(
xlim = c(-8, 3),
ylim = c(49.7, 61),
expand = FALSE
) +
theme_minimal(base_size = 14) +
guides(
fill = guide_colorbar(
barheight = unit(0.5, "npc"), # ~80% of plotting height
barwidth = unit(0.02, "npc") # slim vertical bar
)
) +
theme(
legend.position = "right", # show the colorbar on the right
axis.title.x = element_blank(), # remove x-axis title
axis.title.y = element_blank(), # remove y-axis title
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA)
)
chthamalus_visual_current_plot_TSSProjection 4 (Chthamalus - GBIF + eDNA)
myBiomodEMProj_chthamalus_GBIF_eDNA <- BIOMOD_EnsembleForecasting(
bm.em = myBiomodEM_chthamalus_GBIF_eDNA,
proj.name = 'CurrentEM.chthamalus.GBIF.eDNA',
new.env = enviro_vars_current,
models.chosen = c(
'Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo',
'Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo')
#metric.binary = 'all',
#metric.filter = 'all'
)
myBiomodEMProj_chthamalus_GBIF_eDNA
# visualise
proj_chthamalus_combined <- get_predictions(myBiomodEMProj_chthamalus_GBIF_eDNA) # returns a rasterLayer
plot(proj_chthamalus_combined)# after rocky mask
proj_chthamalus_combined_masked <- mask(proj_chthamalus_combined, expanded_rock)
plot(proj_chthamalus_combined_masked)# Choose the first layer
r_layer <- proj_chthamalus_combined_masked[[1]]
# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
filter(!(x > -8.2 & x < -5.3 &
y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland
filter(!(x > -8.0 & x < -6.0 &
y > 55.1 & y < 55.6))
colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_CM_combined <- r_df
#plot
chthamalus_combined_current_plot_TSS <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "lightgray") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
scale_fill_viridis_c(option = "viridis",
na.value = "transparent",
limits = c(0, 1)) +
labs(fill = NULL) + # 🚨 remove legend title
coord_sf(
xlim = c(-8, 3),
ylim = c(49.7, 61),
expand = FALSE
) +
theme_minimal(base_size = 14) +
guides(
fill = guide_colorbar(
barheight = unit(0.5, "npc"), # ~80% of plot height
barwidth = unit(0.02, "npc") # slim vertical bar
)
) +
theme(
legend.position = "right", # show colorbar on the right
axis.title.x = element_blank(), # remove x-axis title
axis.title.y = element_blank(), # remove y-axis title
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA)
)
chthamalus_combined_current_plot_TSSDifference maps
#join the data together
colnames(r_df_SB_visual) <- c("x", "y", "suitability_visual", "suitability_divide_visual")
colnames(r_df_SB_combined) <- c("x", "y", "suitability_combined", "suitability_divide_combined")
r_df_SB_diff <- left_join(r_df_SB_visual, r_df_SB_combined)
r_df_SB_diff$diff <- r_df_SB_diff$suitability_divide_combined - r_df_SB_diff$suitability_divide_visual
mean(r_df_SB_diff$diff)[1] 0.005617137
# map
SB_diff_plot <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "lightgray") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
geom_tile(data = r_df_SB_diff, aes(x = x, y = y, fill = diff)) +
scale_fill_gradient2(
low = "#F8333C", # negative values
mid = "white", # 0
high = "#0273CA", # positive values
midpoint = 0, # center the scale at 0
na.value = "transparent",
breaks = c(min(r_df_SB_diff$diff, na.rm = TRUE), 0, max(r_df_SB_diff$diff, na.rm = TRUE)),
labels = c("Loss", "No change", "Gain")
) +
labs(fill = NULL) +
coord_sf(
xlim = c(-8, 3),
ylim = c(49.7, 61),
expand = FALSE
) +
theme_minimal(base_size = 14) +
guides(
fill = guide_colorbar(
barheight = unit(0.5, "npc"),
barwidth = unit(0.02, "npc")
)
) +
theme(
legend.position = "right",
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA)
)
SB_diff_plot#join the data together
colnames(r_df_CM_visual) <- c("x", "y", "suitability_visual", "suitability_divide_visual")
colnames(r_df_CM_combined) <- c("x", "y", "suitability_combined", "suitability_divide_combined")
r_df_CM_diff <- left_join(r_df_CM_visual, r_df_CM_combined)
r_df_CM_diff$diff <- r_df_CM_diff$suitability_divide_combined - r_df_CM_diff$suitability_divide_visual
mean(r_df_CM_diff$diff)[1] 0.0125705
# map
CM_diff_plot <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "lightgray") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
geom_tile(data = r_df_CM_diff, aes(x = x, y = y, fill = diff)) +
scale_fill_gradient2(
low = "#F8333C", # negative values
mid = "white", # 0
high = "#0273CA", # positive values
midpoint = 0, # center the scale at 0
na.value = "transparent",
breaks = c(min(r_df_CM_diff$diff, na.rm = TRUE), 0, max(r_df_CM_diff$diff, na.rm = TRUE)),
labels = c("Loss", "No change", "Gain")
) +
labs(fill = NULL) +
coord_sf(
xlim = c(-8, 3),
ylim = c(49.7, 61),
expand = FALSE
) +
theme_minimal(base_size = 14) +
guides(
fill = guide_colorbar(
barheight = unit(0.5, "npc"),
barwidth = unit(0.02, "npc")
)
) +
theme(
legend.position = "right",
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA)
)
CM_diff_plotJoint maps
Let’s plot all our current projections together into one plot.
# Left-aligned species labels
sb_label <- ggdraw() +
draw_label("Semibalanus balanoides",
fontface = "italic",
size = 14,
hjust = 0, # left-aligned
x = 0.02) # move slightly from edge
cm_label <- ggdraw() +
draw_label("Chthamalus montagui",
fontface = "italic",
size = 14,
hjust = 0,
x = 0.02)
#top row
SB_row <- plot_grid(
semibalanus_visual_current_plot_TSS,
semibalanus_combined_current_plot_TSS,
SB_diff_plot,
ncol = 3,
labels = c("a) GBIF baseline + GBIF additional",
"b) GBIF baseline + eDNA",
"c) Difference"),
align = "hv",
label_x = 0, # x = 0 anchors to left edge
label_y = 1, # y = 1 anchors to top edge
hjust = 0, # left-justify long text
vjust = 1 # top-justify vertically
)
#bottom row
CM_row <- plot_grid(
chthamalus_visual_current_plot_TSS,
chthamalus_combined_current_plot_TSS,
CM_diff_plot,
ncol = 3,
labels = c("d) GBIF baseline + GBIF additional",
"e) GBIF baseline + eDNA",
"f) Difference"),
align = "hv",
label_x = 0, # x = 0 anchors to left edge
label_y = 1, # y = 1 anchors to top edge
hjust = 0, # left-justify long text
vjust = 1 # top-justify vertically
)
# Combine rows with labels (same layout as before)
current_plot_2 <- plot_grid(
sb_label, SB_row,
cm_label, CM_row,
ncol = 1,
rel_heights = c(0.08, 1, 0.08, 1)
)
current_plot_2# Save the plot
ggsave(current_plot_2, filename = "Figures/current_plot_2.png", width = 15, height = 13, dpi = 300)Forecasting change
Forecast 1 (Semibalanus - GBIF + GBIF)
myBiomodEMProjFuture_semibalanus_GBIF_GBIF <- BIOMOD_EnsembleForecasting(
bm.em = myBiomodEM_semibalanus_GBIF_GBIF,
proj.name = 'FutureEM.semibalanus.visual',
new.env = enviro_vars_future,
models.chosen = c(
'Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo',
"Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo"
)
#metric.binary = 'all',
#metric.filter = 'all'
)
-=-=-=-=-=-=-=-=-=-=-=-= Do Ensemble Models Projection -=-=-=-=-=-=-=-=-=-=-=-=
Creating suitable Workdir...
-=-=-=-=-=-=-=-=-=-=-=-=-= Do Single Models Projection -=-=-=-=-=-=-=-=-=-=-=-=-=
> Projecting Semibalanus.balanoides_PA1_RUN1_RF ...
> Projecting Semibalanus.balanoides_PA1_RUN1_MAXENT ...
> Projecting Semibalanus.balanoides_PA1_RUN2_RF ...
> Projecting Semibalanus.balanoides_PA1_RUN2_MAXENT ...
> Projecting Semibalanus.balanoides_PA1_RUN1_GAM ...
> Projecting Semibalanus.balanoides_PA1_RUN1_GLM ...
> Projecting Semibalanus.balanoides_PA1_RUN2_GAM ...
> Projecting Semibalanus.balanoides_PA1_RUN2_GLM ...
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
> Projecting Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo ...
> Projecting Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo ...
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
myBiomodEMProjFuture_semibalanus_GBIF_GBIF
-=-=-=-=-=-=-=-=-=-=-=-=-=-= BIOMOD.projection.out -=-=-=-=-=-=-=-=-=-=-=-=-=-=
Projection directory : ./Semibalanus.balanoides/FutureEM.semibalanus.visual
sp.name : Semibalanus.balanoides
expl.var.names : ocean_temp ph wave_fetch
modeling.id : Semibalanus_GBIF_GBIF (
./Semibalanus.balanoides/Semibalanus.balanoides.Semibalanus_GBIF_GBIF.ensemble.models.out
)
models.projected :
Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo, Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
plot(myBiomodEMProjFuture_semibalanus_GBIF_GBIF)# visualise
future_semibalanus_visual <- get_predictions(myBiomodEMProjFuture_semibalanus_GBIF_GBIF) # returns a rasterLayer
plot(future_semibalanus_visual)# after rocky mask
future_semibalanus_visual_masked <- mask(future_semibalanus_visual, expanded_rock)
plot(future_semibalanus_visual_masked)# Choose the first layer
r_layer <- future_semibalanus_visual_masked[[1]]
# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
filter(!(x > -8.2 & x < -5.3 &
y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland
filter(!(x > -8.0 & x < -6.0 &
y > 55.1 & y < 55.6))
colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_SM_visual_future <- r_df
#plot
semibalanus_visual_future_plot <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "lightgray") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
scale_fill_viridis_c(option = "viridis",
na.value = "transparent",
limits = c(0, 1)) +
labs(fill = NULL) + # remove legend title
coord_sf(
xlim = c(-8, 3),
ylim = c(49.7, 61),
expand = FALSE
) +
theme_minimal(base_size = 14) +
guides(
fill = guide_colorbar(
barheight = unit(0.5, "npc"), # ~80% of plot height
barwidth = unit(0.02, "npc") # slim vertical bar
)
) +
theme(
legend.position = "right", # show colorbar on the right
axis.title.x = element_blank(), # remove x-axis title
axis.title.y = element_blank(), # remove y-axis title
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA)
)
semibalanus_visual_future_plotForecast 2 (Semibalanus - GBIF + eDNA)
myBiomodEMProjFuture_semibalanus_GBIF_eDNA <- BIOMOD_EnsembleForecasting(
bm.em = myBiomodEM_semibalanus_GBIF_eDNA,
proj.name = 'FutureEM.semibalanus.combined',
new.env = enviro_vars_future,
models.chosen = c(
'Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo',
"Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo"
)
#metric.binary = 'all',
#metric.filter = 'all'
)
-=-=-=-=-=-=-=-=-=-=-=-= Do Ensemble Models Projection -=-=-=-=-=-=-=-=-=-=-=-=
Creating suitable Workdir...
-=-=-=-=-=-=-=-=-=-=-=-=-= Do Single Models Projection -=-=-=-=-=-=-=-=-=-=-=-=-=
> Projecting Semibalanus.balanoides_PA1_RUN1_RF ...
> Projecting Semibalanus.balanoides_PA1_RUN1_MAXENT ...
> Projecting Semibalanus.balanoides_PA1_RUN2_RF ...
> Projecting Semibalanus.balanoides_PA1_RUN2_MAXENT ...
> Projecting Semibalanus.balanoides_PA1_RUN1_GAM ...
> Projecting Semibalanus.balanoides_PA1_RUN1_GLM ...
> Projecting Semibalanus.balanoides_PA1_RUN2_GAM ...
> Projecting Semibalanus.balanoides_PA1_RUN2_GLM ...
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
> Projecting Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo ...
> Projecting Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo ...
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
myBiomodEMProjFuture_semibalanus_GBIF_eDNA
-=-=-=-=-=-=-=-=-=-=-=-=-=-= BIOMOD.projection.out -=-=-=-=-=-=-=-=-=-=-=-=-=-=
Projection directory : ./Semibalanus.balanoides/FutureEM.semibalanus.combined
sp.name : Semibalanus.balanoides
expl.var.names : ocean_temp ph wave_fetch
modeling.id : Semibalanus_GBIF_eDNA (
./Semibalanus.balanoides/Semibalanus.balanoides.Semibalanus_GBIF_eDNA.ensemble.models.out
)
models.projected :
Semibalanus.balanoides_EMmeanByTSS_mergedData_mergedRun_mergedAlgo, Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
plot(myBiomodEMProjFuture_semibalanus_GBIF_eDNA)# visualise
future_semibalanus_combined <- get_predictions(myBiomodEMProjFuture_semibalanus_GBIF_eDNA) # returns a rasterLayer
plot(future_semibalanus_combined)# after rocky mask
future_semibalanus_combined_masked <- mask(future_semibalanus_combined, expanded_rock)
plot(future_semibalanus_combined_masked)# Choose the first layer
r_layer <- future_semibalanus_combined_masked[[1]]
# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
filter(!(x > -8.2 & x < -5.3 &
y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland
filter(!(x > -8.0 & x < -6.0 &
y > 55.1 & y < 55.6))
colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_future_SB_combined_masked <- r_df
#plot
semibalanus_combined_future_plot <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "lightgray") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
scale_fill_viridis_c(option = "viridis",
na.value = "transparent",
limits = c(0, 1)) +
labs(fill = NULL) + # remove legend title
coord_sf(
xlim = c(-8, 3),
ylim = c(49.7, 61),
expand = FALSE
) +
theme_minimal(base_size = 14) +
guides(
fill = guide_colorbar(
barheight = unit(0.5, "npc"), # ~80% of plot height
barwidth = unit(0.02, "npc") # slim vertical bar
)
) +
theme(
legend.position = "right", # show colorbar on the right
axis.title.x = element_blank(), # remove x-axis title
axis.title.y = element_blank(), # remove y-axis title
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA)
)
semibalanus_combined_future_plotForecast 3 (Chthamalus - GBIF + GBIF)
myBiomodEMProjFuture_chthamalus_GBIF_GBIF<- BIOMOD_EnsembleForecasting(
bm.em = myBiomodEM_chthamalus_GBIF_GBIF,
proj.name = 'FutureEM.chthamalus.visual',
new.env = enviro_vars_future,
models.chosen = c(
'Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo',
"Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo"
),
metric.binary = 'all',
metric.filter = 'all'
)
-=-=-=-=-=-=-=-=-=-=-=-= Do Ensemble Models Projection -=-=-=-=-=-=-=-=-=-=-=-=
Creating suitable Workdir...
-=-=-=-=-=-=-=-=-=-=-=-=-= Do Single Models Projection -=-=-=-=-=-=-=-=-=-=-=-=-=
> Projecting Chthamalus.montagui_PA1_RUN1_GAM ...
> Projecting Chthamalus.montagui_PA1_RUN1_GLM ...
> Projecting Chthamalus.montagui_PA1_RUN1_RF ...
> Projecting Chthamalus.montagui_PA1_RUN1_MAXENT ...
> Projecting Chthamalus.montagui_PA1_RUN2_GAM ...
> Projecting Chthamalus.montagui_PA1_RUN2_GLM ...
> Projecting Chthamalus.montagui_PA1_RUN2_RF ...
> Projecting Chthamalus.montagui_PA1_RUN2_MAXENT ...
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
> Projecting Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo ...
> Projecting Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo ...
> Building TSS binaries / filtered
> Building ROC binaries / filtered
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
myBiomodEMProjFuture_chthamalus_GBIF_GBIF
-=-=-=-=-=-=-=-=-=-=-=-=-=-= BIOMOD.projection.out -=-=-=-=-=-=-=-=-=-=-=-=-=-=
Projection directory : ./Chthamalus.montagui/FutureEM.chthamalus.visual
sp.name : Chthamalus.montagui
expl.var.names : ocean_temp ph wave_fetch
modeling.id : chthamalus_GBIF_GBIF (
./Chthamalus.montagui/Chthamalus.montagui.chthamalus_GBIF_GBIF.ensemble.models.out
)
models.projected :
Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo, Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo
available binary projection : TSS, ROC
available filtered projection : TSS, ROC
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
plot(myBiomodEMProjFuture_chthamalus_GBIF_GBIF)# visualise
future_chthamalus_visual <- get_predictions(myBiomodEMProjFuture_chthamalus_GBIF_GBIF) # returns a rasterLayer
plot(future_chthamalus_visual)# after rocky mask
future_chthamalus_visual_masked <- mask(future_chthamalus_visual, expanded_rock)
plot(future_chthamalus_visual_masked)# Choose the first layer
r_layer <- future_chthamalus_visual_masked[[1]]
# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
filter(!(x > -8.2 & x < -5.3 &
y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland
filter(!(x > -8.0 & x < -6.0 &
y > 55.1 & y < 55.6))
colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_future_CM_visual_masked <- r_df
#plot
chthamalus_visual_future_plot <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "lightgray") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
scale_fill_viridis_c(option = "viridis",
na.value = "transparent",
limits = c(0, 1)) +
labs(fill = NULL) + # remove legend title
coord_sf(
xlim = c(-8, 3),
ylim = c(49.7, 61),
expand = FALSE
) +
theme_minimal(base_size = 14) +
guides(
fill = guide_colorbar(
barheight = unit(0.5, "npc"), # ~80% of plot height
barwidth = unit(0.02, "npc") # slim vertical bar
)
) +
theme(
legend.position = "right", # show colorbar on the right
axis.title.x = element_blank(), # remove x-axis title
axis.title.y = element_blank(), # remove y-axis title
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA)
)
chthamalus_visual_future_plotForecast 4 (Chthamalus - GBIF + eDNA)
myBiomodEMProjFuture_chthamalus_GBIF_eDNA<- BIOMOD_EnsembleForecasting(
bm.em = myBiomodEM_chthamalus_GBIF_eDNA,
proj.name = 'FutureEM.chthamalus.combined',
new.env = enviro_vars_future,
models.chosen = c(
'Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo',
"Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo"
),
metric.binary = 'all',
metric.filter = 'all'
)
-=-=-=-=-=-=-=-=-=-=-=-= Do Ensemble Models Projection -=-=-=-=-=-=-=-=-=-=-=-=
Creating suitable Workdir...
-=-=-=-=-=-=-=-=-=-=-=-=-= Do Single Models Projection -=-=-=-=-=-=-=-=-=-=-=-=-=
> Projecting Chthamalus.montagui_PA1_RUN1_GAM ...
> Projecting Chthamalus.montagui_PA1_RUN1_GLM ...
> Projecting Chthamalus.montagui_PA1_RUN1_RF ...
> Projecting Chthamalus.montagui_PA1_RUN1_MAXENT ...
> Projecting Chthamalus.montagui_PA1_RUN2_GAM ...
> Projecting Chthamalus.montagui_PA1_RUN2_GLM ...
> Projecting Chthamalus.montagui_PA1_RUN2_RF ...
> Projecting Chthamalus.montagui_PA1_RUN2_MAXENT ...
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
> Projecting Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo ...
> Projecting Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo ...
> Building TSS binaries / filtered
> Building ROC binaries / filtered
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-= Done -=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
myBiomodEMProjFuture_chthamalus_GBIF_eDNA
-=-=-=-=-=-=-=-=-=-=-=-=-=-= BIOMOD.projection.out -=-=-=-=-=-=-=-=-=-=-=-=-=-=
Projection directory : ./Chthamalus.montagui/FutureEM.chthamalus.combined
sp.name : Chthamalus.montagui
expl.var.names : ocean_temp ph wave_fetch
modeling.id : chthamalus_GBIF_eDNA (
./Chthamalus.montagui/Chthamalus.montagui.chthamalus_GBIF_eDNA.ensemble.models.out
)
models.projected :
Chthamalus.montagui_EMmeanByTSS_mergedData_mergedRun_mergedAlgo, Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo
available binary projection : TSS, ROC
available filtered projection : TSS, ROC
-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=
plot(myBiomodEMProjFuture_chthamalus_GBIF_eDNA)# visualise
future_chthamalus_combined <- get_predictions(myBiomodEMProjFuture_chthamalus_GBIF_eDNA) # returns a rasterLayer
plot(future_chthamalus_combined)# after rocky mask
future_chthamalus_combined_masked <- mask(future_chthamalus_combined, expanded_rock)
plot(future_chthamalus_combined_masked)# Choose the first layer
r_layer <- future_chthamalus_combined_masked[[1]]
# Convert SpatRaster to data frame for ggplot
r_df <- as.data.frame(r_layer, xy = TRUE) %>%
filter(!(x > -8.2 & x < -5.3 &
y > 53.9 & y < 55.2)) %>% # filter out Northern Ireland
filter(!(x > -8.0 & x < -6.0 &
y > 55.1 & y < 55.6))
colnames(r_df) <- c("x", "y", "suitability")
r_df$suitability_divide = r_df$suitability / 1000
r_df_future_CM_combined_masked <- r_df
#plot
chthamalus_combined_future_plot <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "lightgray") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
geom_tile(data = r_df, aes(x = x, y = y, fill = suitability_divide)) +
scale_fill_viridis_c(option = "viridis",
na.value = "transparent",
limits = c(0, 1)) +
labs(fill = NULL) + # remove legend title
coord_sf(
xlim = c(-8, 3),
ylim = c(49.7, 61),
expand = FALSE
) +
theme_minimal(base_size = 14) +
guides(
fill = guide_colorbar(
barheight = unit(0.5, "npc"), # ~80% of plot height
barwidth = unit(0.02, "npc") # slim vertical bar
)
) +
theme(
legend.position = "right", # show colorbar on the right
axis.title.x = element_blank(), # remove x-axis title
axis.title.y = element_blank(), # remove y-axis title
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA)
)
chthamalus_combined_future_plotDifference maps
#join the data together (future)
colnames(r_df_SM_visual_future) <- c("x", "y", "suitability_visual", "suitability_divide_visual")
colnames(r_df_future_SB_combined_masked) <- c("x", "y", "suitability_combined", "suitability_divide_combined")
r_df_SB_diff_future <- left_join(r_df_SM_visual_future, r_df_future_SB_combined_masked)
r_df_SB_diff_future$diff <- r_df_SB_diff_future$suitability_divide_combined - r_df_SB_diff_future$suitability_divide_visual
mean(r_df_SB_diff_future$diff)[1] -0.05492191
# now for current vs future
SB_future_df <- r_df_future_SB_combined_masked %>% select(c(x,y,suitability_divide_combined))
SB_current_df <- r_df_SB_combined %>% select(c(x, y , suitability_divide_combined))
colnames(SB_future_df) <- c("x", "y", "suitability_future")
colnames(SB_current_df) <- c("x", "y", "suitability_current")
SB_current_future_diff <- left_join(SB_current_df, SB_future_df)
SB_current_future_diff$diff <- SB_current_future_diff$suitability_future - SB_current_future_diff$suitability_current
head(SB_current_future_diff) x y suitability_current suitability_future diff
1 -1.0749999 60.725 0.100 0.100 0.000
2 -0.9749999 60.725 0.182 0.092 -0.090
3 -0.8249999 60.725 0.258 0.114 -0.144
4 -0.7749999 60.725 0.124 0.069 -0.055
5 -0.7249999 60.725 0.070 0.131 0.061
6 -0.9749999 60.675 0.175 0.058 -0.117
mean(SB_current_future_diff$diff)[1] -0.1877744
# map
SB_diff_plot_future <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "lightgray") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
geom_tile(data = SB_current_future_diff, aes(x = x, y = y, fill = diff)) +
scale_fill_gradient2(
low = "#F8333C", # negative values
mid = "white", # 0
high = "#0273CA", # positive values
midpoint = 0, # center the scale at 0
na.value = "transparent",
breaks = c(min(SB_current_future_diff$diff, na.rm = TRUE), 0, max(SB_current_future_diff$diff, na.rm = TRUE)),
labels = c("Loss", "No change", "Gain")
) +
labs(fill = NULL#,
#title = "Semibalanus balanoides"
) +
coord_sf(
xlim = c(-8, 3),
ylim = c(49.7, 61),
expand = FALSE
) +
theme_bw(base_size = 14) +
guides(
fill = guide_colorbar(
barheight = unit(0.5, "npc"),
barwidth = unit(0.02, "npc")
)
) +
theme(
legend.position = "right",
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA),
plot.title = element_text(size = 10, face = "italic")
)
SB_diff_plot_future#join the data together (future)
colnames(r_df_future_CM_visual_masked) <- c("x", "y", "suitability_visual", "suitability_divide_visual")
colnames(r_df_future_CM_combined_masked) <- c("x", "y", "suitability_combined", "suitability_divide_combined")
r_df_CM_diff_future <- left_join(r_df_future_CM_visual_masked, r_df_future_CM_combined_masked)
r_df_CM_diff_future$diff <- r_df_CM_diff_future$suitability_divide_combined - r_df_CM_diff_future$suitability_divide_visual
mean(r_df_CM_diff_future$diff)[1] -0.1413449
# now for current vs future
CM_future_df <- r_df_future_CM_combined_masked %>% select(c(x,y,suitability_divide_combined))
CM_current_df <- r_df_CM_combined %>% select(c(x, y , suitability_divide_combined))
colnames(CM_future_df) <- c("x", "y", "suitability_future")
colnames(CM_current_df) <- c("x", "y", "suitability_current")
CM_current_future_diff <- left_join(CM_current_df, CM_future_df)
CM_current_future_diff$diff <- CM_current_future_diff$suitability_future - CM_current_future_diff$suitability_current
head(CM_current_future_diff) x y suitability_current suitability_future diff
1 -1.0749999 60.725 0.023 0.620 0.597
2 -0.9749999 60.725 0.058 0.704 0.646
3 -0.8249999 60.725 0.046 0.683 0.637
4 -0.7749999 60.725 0.013 0.582 0.569
5 -0.7249999 60.725 0.009 0.598 0.589
6 -0.9749999 60.675 0.061 0.716 0.655
mean(CM_current_future_diff$diff)[1] 0.4080141
# map
CM_diff_plot_future <- ggplot() +
geom_sf(data = uk, fill = "grey98", color = "lightgray") +
geom_sf(data = other_maps, fill = "white", color = "grey92") +
geom_sf(data = northern_ireland, fill = "white", color = "grey92") +
geom_tile(data = CM_current_future_diff, aes(x = x, y = y, fill = diff)) +
scale_fill_gradient2(
low = "#F8333C", # negative values
mid = "white", # 0
high = "#0273CA", # positive values
midpoint = 0, # center the scale at 0
na.value = "transparent",
breaks = c(min(CM_current_future_diff$diff, na.rm = TRUE), 0, max(CM_current_future_diff$diff, na.rm = TRUE)),
labels = c("Loss", "No change", "Gain")
) +
labs(fill = NULL#,
#title = "Chthamalus montagui"
) +
coord_sf(
xlim = c(-8, 3),
ylim = c(49.7, 61),
expand = FALSE
) +
theme_bw(base_size = 14) +
guides(
fill = guide_colorbar(
barheight = unit(0.5, "npc"),
barwidth = unit(0.02, "npc")
)
) +
theme(
legend.position = "right",
axis.title.x = element_blank(),
axis.title.y = element_blank(),
panel.background = element_rect(fill = "white", colour = NA),
plot.background = element_rect(fill = "white", colour = NA),
plot.title = element_text(size = 10, face = "italic")
)
CM_diff_plot_futureJoint maps
future_projections_plot <- cowplot::plot_grid(
SB_diff_plot_future,
CM_diff_plot_future,
ncol = 2, align = "hv", label_fontface = "italic"
)
future_projections_plot# Save the plot
ggsave(future_projections_plot, filename = "Figures/future_projections_plot.png", width = 14, height = 11, dpi = 300)Lat slice plot
future_colour = "#FCAB10"
current_colour = "#2B9EB3"
#SB
# 1. Bin latitude (y) into 5 equal ranges
SB_current_future_diff <- SB_current_future_diff %>%
mutate(lat_bin = cut(y, breaks = 5))
# 2. Reshape to long format for ggplot
SB_long <- SB_current_future_diff %>%
pivot_longer(cols = starts_with("suitability"),
names_to = "scenario",
values_to = "suitability")
# 3. Compute mean suitability per latitude bin and scenario
SB_summary <- SB_long %>%
group_by(lat_bin, scenario) %>%
summarise(mean_suit = mean(suitability, na.rm = TRUE),
se = sd(suitability)/sqrt(n()),
sd = sd(suitability))
#add species
SB_summary$species = "Semibalanus balanoides"
SB_long$species = "Semibalanus balanoides"
#CM
# 1. Bin latitude (y) into 5 equal ranges
CM_current_future_diff <- CM_current_future_diff %>%
mutate(lat_bin = cut(y, breaks = 5))
# 2. Reshape to long format for ggplot
CM_long <- CM_current_future_diff %>%
pivot_longer(cols = starts_with("suitability"),
names_to = "scenario",
values_to = "suitability")
# 3. Compute mean suitability per latitude bin and scenario
CM_summary <- CM_long %>%
group_by(lat_bin, scenario) %>%
summarise(mean_suit = mean(suitability, na.rm = TRUE),
se = sd(suitability)/sqrt(n()),
sd = sd(suitability))
CM_summary$species = "Chthamalus montagui"
CM_long$species = "Chthamalus montagui"# long
long_both <- rbind(CM_long, SB_long)
lat_bin_summary <- rbind(SB_summary, CM_summary)arrow_data <- lat_bin_summary %>%
select(lat_bin, species, scenario, mean_suit) %>%
pivot_wider(names_from = scenario, values_from = mean_suit) %>%
rename(
current = suitability_current,
future = suitability_future
) %>%
mutate(
future = if_else(future < current, future + 0.02, future - 0.02)
)
# 1Ensure your main plotting dataset has species as a factor in the desired order
lat_bin_summary <- lat_bin_summary %>%
mutate(species = factor(species, levels = c("Semibalanus balanoides", "Chthamalus montagui")))
# 2 Ensure arrow data also respects the same factor levels
arrow_data <- arrow_data %>%
mutate(species = factor(species, levels = c("Semibalanus balanoides", "Chthamalus montagui")))
# 3 Plot
Lat_bin_line_plot <- ggplot(lat_bin_summary,
aes(
x = mean_suit,
y = lat_bin,
color = scenario,
shape = species
)) +
geom_segment(
data = arrow_data,
aes(x = current, xend = future, y = lat_bin, yend = lat_bin),
arrow = arrow(length = unit(0.2, "cm")),
color = "grey50"
) +
geom_pointrange(aes(ymin = mean_suit,
ymax = mean_suit),
size = 1) +
facet_wrap(~species, ncol = 1) + # facets follow factor order
theme_bw() +
theme(strip.text = element_text(face = "italic"),
legend.position = "right") +
labs(
x = "Suitability score",
y = "Latitude range",
color = "Climate scenario",
shape = "Species"
) +
scale_color_manual(
name = "Climate scenario",
values = c("suitability_current" = current_colour,
"suitability_future" = future_colour),
labels = c("suitability_current" = "Current",
"suitability_future" = "Future (2100)")
) +
scale_y_discrete(
labels = c(
"(49.9,52]" = "49.9 to 52°N",
"(52,54.2]" = "52 to 54.2°N",
"(54.2,56.4]" = "54.2 to 56.4°N",
"(56.4,58.6]" = "56.4 to 58.6°N",
"(58.6,60.7]" = "58.6 to 60.7°N"
))
Lat_bin_line_plot# Save the plot
ggsave(Lat_bin_line_plot, filename = "Figures/Lat_bin_line_plot.png", width = 8, height = 5, dpi = 300)Lat_bin_line_plot_SB <- ggplot(lat_bin_summary %>%
filter(species == "Semibalanus balanoides"),
aes(
x = mean_suit,
y = lat_bin,
color = scenario
)) +
geom_pointrange(aes(ymin = mean_suit,
ymax = mean_suit),
size = 1) +
geom_segment(
data = arrow_data %>% filter(species == "Semibalanus balanoides"),
aes(x = current, xend = future, y = lat_bin, yend = lat_bin),
arrow = arrow(length = unit(0.2, "cm")),
color = "grey50"
) +
theme_bw() +
theme(strip.text = element_text(face = "italic"),
legend.position = "right") +
labs(
x = "Suitability score",
y = "Latitude range",
color = "Climate scenario"
) +
scale_color_manual(
name = "Climate scenario",
values = c("suitability_current" = current_colour,
"suitability_future" = future_colour),
labels = c("suitability_current" = "Current",
"suitability_future" = "Future (2100)")
) +
scale_y_discrete(
labels = c(
"(49.9,52]" = "49.9 to 52°N",
"(52,54.2]" = "52 to 54.2°N",
"(54.2,56.4]" = "54.2 to 56.4°N",
"(56.4,58.6]" = "56.4 to 58.6°N",
"(58.6,60.7]" = "58.6 to 60.7°N"
))+ xlim(0,1)
Lat_bin_line_plot_SBLat_bin_line_plot_CM <- ggplot(lat_bin_summary %>%
filter(species == "Chthamalus montagui"),
aes(
x = mean_suit,
y = lat_bin,
color = scenario
)) +
geom_pointrange(aes(ymin = mean_suit,
ymax = mean_suit),
size = 1) +
geom_segment(
data = arrow_data %>% filter(species == "Chthamalus montagui"),
aes(x = current, xend = future, y = lat_bin, yend = lat_bin),
arrow = arrow(length = unit(0.2, "cm")),
color = "grey50"
) +
theme_bw() +
theme(strip.text = element_text(face = "italic"),
legend.position = "right") +
labs(
x = "Suitability score",
y = "Latitude range",
color = "Climate scenario"
) +
scale_color_manual(
name = "Climate scenario",
values = c("suitability_current" = current_colour,
"suitability_future" = future_colour),
labels = c("suitability_current" = "Current",
"suitability_future" = "Future (2100)")
) +
scale_y_discrete(
labels = c(
"(49.9,52]" = "49.9 to 52°N",
"(52,54.2]" = "52 to 54.2°N",
"(54.2,56.4]" = "54.2 to 56.4°N",
"(56.4,58.6]" = "56.4 to 58.6°N",
"(58.6,60.7]" = "58.6 to 60.7°N"
)) + xlim(0,1)
Lat_bin_line_plot_CM# Left-aligned species labels
sb_label <- ggdraw() +
draw_label("Semibalanus balanoides",
fontface = "italic",
size = 14,
hjust = 0, # left-aligned
x = 0.02) # move slightly from edge
cm_label <- ggdraw() +
draw_label("Chthamalus montagui",
fontface = "italic",
size = 14,
hjust = 0,
x = 0.02)
#top row
row_sb <- plot_grid(
SB_diff_plot_future,
Lat_bin_line_plot_SB,
ncol = 2,
labels = c("a", "b"),
align = "hv"
)
#bottom row
row_cm <- plot_grid(
CM_diff_plot_future,
Lat_bin_line_plot_CM,
ncol = 2,
labels = c("c", "d"),
align = "hv"
)
# Combine rows with labels (same layout as before)
forecast_plot_2 <- plot_grid(
sb_label, row_sb,
cm_label, row_cm,
ncol = 1,
rel_heights = c(0.08, 1, 0.08, 1),
rel_widths = c(0.3, 1)
)
forecast_plot_2# Save
ggsave(forecast_plot_2, filename = "Figures/forecast_plot2.png", width = 12, height = 8, dpi = 300)Plots for write-up
Now let’s make our final comparison plots for the write-up. These mainly consist of boxpots.
Evaluation plots
#semibalanus
eval.semi.bal.individual.visual <- read.csv("Data/Processed_Data/Semi.bal/Individual-GBIF-GBIF/Evaluations_All.csv", row.names = 1)
eval.semi.bal.individual.visual$model_type = "individual_algo"
eval.semi.bal.individual.visual$dataset = "GBIF + GBIF"
eval.semi.bal.individual.visual$species = "Semibalanus balanoides"
eval.semi.bal.ensemble.visual <- read.csv("Data/Processed_Data/Semi.bal/Ensemble-GBIF-GBIF/Evaluations_All.csv", row.names = 1)
eval.semi.bal.ensemble.visual$model_type = "ensemble"
eval.semi.bal.ensemble.visual$dataset = "GBIF + GBIF"
eval.semi.bal.ensemble.visual$species = "Semibalanus balanoides"
eval.semi.bal.individual.combined <- read.csv("Data/Processed_Data/Semi.bal/Individual-GBIF-eDNA/Evaluations_All.csv", row.names = 1)
eval.semi.bal.individual.combined$model_type = "individual_algo"
eval.semi.bal.individual.combined$dataset = "GBIF-eDNA"
eval.semi.bal.individual.combined$species = "Semibalanus balanoides"
eval.semi.bal.ensemble.combined <- read.csv("Data/Processed_Data/Semi.bal/Ensemble-GBIF-eDNA/Evaluations_All.csv", row.names = 1)
eval.semi.bal.ensemble.combined$model_type = "ensemble"
eval.semi.bal.ensemble.combined$dataset = "GBIF-eDNA"
eval.semi.bal.ensemble.combined$species = "Semibalanus balanoides"
eval.semi.bal.individual.baseline <- read.csv("Data/Processed_Data/Semi.bal/Individual-baseline/Evaluations_All.csv", row.names = 1)
eval.semi.bal.individual.baseline$model_type = "individual_algo"
eval.semi.bal.individual.baseline$dataset = "Baseline"
eval.semi.bal.individual.baseline$species = "Semibalanus balanoides"
eval.semi.bal.ensemble.baseline <- read.csv("Data/Processed_Data/Semi.bal/Ensemble-baseline/Evaluations_All.csv", row.names = 1)
eval.semi.bal.ensemble.baseline$model_type = "ensemble"
eval.semi.bal.ensemble.baseline$dataset = "Baseline"
eval.semi.bal.ensemble.baseline$species = "Semibalanus balanoides"
# chthamalus
eval.chth.mon.individual.visual <- read.csv("Data/Processed_Data/Chtham.mont/Individual-GBIF-GBIF/Evaluations_All.csv", row.names = 1)
eval.chth.mon.individual.visual$model_type = "individual_algo"
eval.chth.mon.individual.visual$dataset = "GBIF + GBIF"
eval.chth.mon.individual.visual$species = "Chthamalus montagui"
eval.chth.mon.ensemble.visual <- read.csv("Data/Processed_Data/Chtham.mont/Ensemble-GBIF-GBIF/Evaluations_All.csv", row.names = 1)
eval.chth.mon.ensemble.visual$model_type = "ensemble"
eval.chth.mon.ensemble.visual$dataset = "GBIF + GBIF"
eval.chth.mon.ensemble.visual$species = "Chthamalus montagui"
eval.chth.mon.individual.combined <- read.csv("Data/Processed_Data/Chtham.mont/Individual-GBIF-eDNA/Evaluations_All.csv", row.names = 1)
eval.chth.mon.individual.combined$model_type = "individual_algo"
eval.chth.mon.individual.combined$dataset = "GBIF-eDNA"
eval.chth.mon.individual.combined$species = "Chthamalus montagui"
eval.chth.mon.ensemble.combined <- read.csv("Data/Processed_Data/Chtham.mont/Ensemble-GBIF-eDNA/Evaluations_All.csv", row.names = 1)
eval.chth.mon.ensemble.combined$model_type = "ensemble"
eval.chth.mon.ensemble.combined$dataset = "GBIF-eDNA"
eval.chth.mon.ensemble.combined$species = "Chthamalus montagui"
eval.chth.mon.individual.baseline <- read.csv("Data/Processed_Data/Chtham.mont/Individual-baseline/Evaluations_All.csv", row.names = 1)
eval.chth.mon.individual.baseline$model_type = "individual_algo"
eval.chth.mon.individual.baseline$dataset = "Baseline"
eval.chth.mon.individual.baseline$species = "Chthamalus montagui"
eval.chth.mon.ensemble.baseline <- read.csv("Data/Processed_Data/Chtham.mont/Ensemble-baseline/Evaluations_All.csv", row.names = 1)
eval.chth.mon.ensemble.baseline$model_type = "ensemble"
eval.chth.mon.ensemble.baseline$dataset = "Baseline"
eval.chth.mon.ensemble.baseline$species = "Chthamalus montagui"
#join
neworder <- c("Semibalanus balanoides","Chthamalus montagui")
full_evals_ensemble<- rbind(eval.semi.bal.ensemble.visual,
eval.semi.bal.ensemble.combined,
eval.semi.bal.ensemble.baseline,
eval.chth.mon.ensemble.visual,
eval.chth.mon.ensemble.combined,
eval.chth.mon.ensemble.baseline)
full_evals_individual<- rbind(eval.semi.bal.individual.visual,
eval.semi.bal.individual.combined,
eval.semi.bal.individual.baseline,
eval.chth.mon.individual.visual,
eval.chth.mon.individual.combined,
eval.chth.mon.individual.baseline)
full_evals_individual <- arrange(transform(full_evals_individual,
species=factor(species,levels=neworder)),species)
# filter for mean only in ensemble
full_evals_ensemble_mean <- full_evals_ensemble %>% filter(algo == "EMmean")Let’s compare the datasets across the evaluation metrics.
GBIF_only_colour = "#FA230F"
GBIF_eDNA_colour = "#345995"
baseline_colour = "#EAC435"eval_boxplot_ensemble <- ggplot(full_evals_ensemble_mean, aes(x = metric.eval, y = evaluation, fill = dataset)) +
geom_boxplot() +
theme_bw() +
facet_wrap(~factor(species, neworder)) +
scale_fill_manual(values = c(
"GBIF + GBIF" = GBIF_only_colour,
"GBIF-eDNA" = GBIF_eDNA_colour,
"Baseline" = baseline_colour
)) +
labs(x = "Metric", y = "Evaluation value", fill = "Dataset") +
theme(legend.position = "none")
eval_boxplot_ensembleggsave(eval_boxplot_ensemble, file = "Figures/eval_boxplot_ensemble.png", height = 5, width = 8, dpi = 300)eval_boxplot_individual<- ggplot(full_evals_individual, aes(x = metric.eval, y = evaluation, fill = dataset)) +
geom_boxplot() +
theme_bw() +
facet_wrap(~factor(species, neworder)) +
scale_fill_manual(values = c(
"GBIF + GBIF" = GBIF_only_colour,
"GBIF-eDNA" = GBIF_eDNA_colour,
"Baseline" = baseline_colour
)) +
labs(x = "Metric", y = "Evaluation value", fill = "Dataset")
eval_boxplot_individualggsave(eval_boxplot_individual, file = "Figures/eval_boxplot_individual.png", height = 5, width = 8, dpi = 300)eval_boxplot_together <- plot_grid(eval_boxplot_individual, eval_boxplot_ensemble, labels = c("a", "b"), ncol = 1, align = "hv")
eval_boxplot_togetherggsave(eval_boxplot_together, file = "Figures/eval_boxplot_together.png", height = 7, width = 7, dpi = 300)eval_boxplot_individual_models<- ggplot(full_evals_individual, aes(x = metric.eval, y = evaluation, fill = algo)) +
geom_boxplot() +
theme_bw() +
facet_wrap(~factor(species, neworder)) +
labs(x = "Metric", y = "Evaluation value", fill = "Dataset")
eval_boxplot_individual_modelsggsave(eval_boxplot_individual_models, file = "Figures/eval_boxplot_individual_models.png", height = 5, width = 8, dpi = 300)eval_boxplot_individual_models_data<- ggplot(full_evals_individual, aes(x = metric.eval, y = evaluation, fill = algo, colour = dataset)) +
geom_boxplot() +
theme_bw() +
facet_wrap(~factor(species, neworder)) +
labs(x = "Metric", y = "Evaluation value", fill = "Algo")
eval_boxplot_individual_models_dataggsave(eval_boxplot_individual_models_data, file = "Figures/eval_boxplot_individual_models_data.png", height = 5, width = 8, dpi = 300)eval_boxplot_individual_models_data_TSS <- ggplot(
full_evals_individual %>% filter(metric.eval == "TSS"),
aes(x = algo, y = evaluation, fill = dataset)
) +
geom_boxplot() +
theme_bw() +
facet_wrap( ~ factor(species, neworder)) +
labs(x = "Algorithm", y = "TSS", fill = "Dataset") +
scale_fill_manual(values = c(
"GBIF + GBIF" = GBIF_only_colour,
"GBIF-eDNA" = GBIF_eDNA_colour,
"Baseline" = baseline_colour
)) +
geom_hline(yintercept=0.4, linetype='dashed', col = 'red')
eval_boxplot_individual_models_data_TSSeval_boxplot_individual_models_data_ROC <- ggplot(
full_evals_individual %>% filter(metric.eval == "ROC"),
aes(x = algo, y = evaluation, fill = dataset)
) +
geom_boxplot() +
theme_bw() +
facet_wrap( ~ factor(species, neworder)) +
labs(x = "Algorithm", y = "ROC", fill = "Dataset")+
scale_fill_manual(values = c(
"GBIF + GBIF" = GBIF_only_colour,
"GBIF-eDNA" = GBIF_eDNA_colour,
"Baseline" = baseline_colour
)) +
geom_hline(yintercept=0.7, linetype='dashed', col = 'red')
eval_boxplot_individual_models_data_ROCeval_boxplot_together_algo <- plot_grid(eval_boxplot_individual_models_data_ROC, eval_boxplot_individual_models_data_TSS, labels = c("a", "b"), ncol = 1)
eval_boxplot_together_algoggsave(eval_boxplot_together_algo, file = "Figures/eval_boxplot_together_algo.png", height = 8, width = 8, dpi = 300)Let’s compare ensemble vs individual.
full_evals_individual_reduced <- full_evals_individual %>% select(c(full.name, evaluation, dataset, species, metric.eval, model_type))
full_evals_ensemble_mean <- full_evals_ensemble_mean %>% select(c(full.name, evaluation, dataset, species, metric.eval, model_type))
eval_reduced_joined <- rbind(full_evals_individual_reduced, full_evals_ensemble_mean)
#summary data
summary_eval <- eval_reduced_joined %>%
group_by(dataset, model_type, metric.eval,species) %>%
summarise(mean = mean(evaluation),
se = sd(evaluation)/sqrt(n()))
summary_eval# A tibble: 24 × 6
# Groups: dataset, model_type, metric.eval [12]
dataset model_type metric.eval species mean se
<chr> <chr> <chr> <chr> <dbl> <dbl>
1 Baseline ensemble ROC Chthamalus montagui 0.834 0
2 Baseline ensemble ROC Semibalanus balanoides 0.688 0.0145
3 Baseline ensemble TSS Chthamalus montagui 0.557 0
4 Baseline ensemble TSS Semibalanus balanoides 0.343 0.0110
5 Baseline individual_algo ROC Chthamalus montagui 0.818 0.0109
6 Baseline individual_algo ROC Semibalanus balanoides 0.595 0.0229
7 Baseline individual_algo TSS Chthamalus montagui 0.580 0.0196
8 Baseline individual_algo TSS Semibalanus balanoides 0.248 0.0302
9 GBIF + GBIF ensemble ROC Chthamalus montagui 0.848 0
10 GBIF + GBIF ensemble ROC Semibalanus balanoides 0.669 0.0155
# ℹ 14 more rows
eval_boxplot_join_data_TSS <- ggplot(
eval_reduced_joined %>% filter(metric.eval == "TSS"),
aes(x = model_type, y = evaluation, fill = dataset)
) +
geom_boxplot() +
theme_bw() +
facet_wrap( ~ factor(species, neworder)) +
labs(x = "Model type", y = "TSS", fill = "Dataset") +
scale_fill_manual(values = c(
"GBIF + GBIF" = GBIF_only_colour,
"GBIF-eDNA" = GBIF_eDNA_colour,
"Baseline" = baseline_colour
))+
scale_x_discrete(labels = c("Ensemble", "Individual")) +
theme(legend.position = "none")
eval_boxplot_join_data_TSSeval_boxplot_join_data_ROC <- ggplot(
eval_reduced_joined %>% filter(metric.eval == "ROC"),
aes(x = model_type, y = evaluation, fill = dataset)
) +
geom_boxplot() +
theme_bw() +
facet_wrap( ~ factor(species, neworder)) +
labs(x = "Model type", y = "ROC", fill = "Dataset") +
scale_fill_manual(values = c(
"GBIF + GBIF" = GBIF_only_colour,
"GBIF-eDNA" = GBIF_eDNA_colour,
"Baseline" = baseline_colour
)) +
scale_x_discrete(labels = c("Ensemble", "Individual"))
eval_boxplot_join_data_ROCeval_boxplot_together_both <- plot_grid(eval_boxplot_join_data_ROC, eval_boxplot_join_data_TSS, labels = c("a", "b"), ncol = 1, align = "hv")
eval_boxplot_together_bothggsave(eval_boxplot_together_both, file = "Figures/eval_boxplot_together_both.png", height = 8, width = 8, dpi = 300)mean_range_plot_mods_TSS <- ggplot(
summary_eval %>%
filter(metric.eval == "TSS") %>%
mutate(dataset = factor(
dataset, levels = c("Baseline", "GBIF + GBIF", "GBIF-eDNA")
)),
aes(x = model_type, y = mean, colour = dataset)
) +
geom_pointrange(aes(ymin = mean - (se*2), ymax = mean + (se*2)),
size = 1.5,
position = position_dodge(width = 0.4)) + # dodge by dataset
labs(x = "Model type", y = "TSS", colour = "Dataset") +
facet_wrap( ~ factor(species, neworder)) +
scale_x_discrete(
limits = c("individual_algo", "ensemble"),
labels = c("Individual", "Ensemble")
) + # reorder x-axis
scale_colour_manual(
values = c("GBIF + GBIF" = GBIF_only_colour,
"GBIF-eDNA" = GBIF_eDNA_colour,
"Baseline" = baseline_colour),
breaks = c("Baseline", "GBIF + GBIF", "GBIF-eDNA"), # reorder datasets
labels = c("GBIF + GBIF" = "GBIF + GBIF",
"GBIF-eDNA" = "GBIF + eDNA",
"Baseline" = "GBIF baseline")
) +
theme_bw() +
theme(text = element_text(size = 13), legend.position = "none",
strip.text = element_text(face = "italic"))
mean_range_plot_mods_TSSmean_range_plot_mods_roc <- ggplot(
summary_eval %>%
filter(metric.eval == "ROC") %>%
mutate(dataset = factor(
dataset, levels = c("Baseline", "GBIF + GBIF", "GBIF-eDNA")
)),
aes(x = model_type, y = mean, colour = dataset)
) +
geom_pointrange(aes(ymin = mean - (se*2), ymax = mean + (se*2)),
size = 1.5,
position = position_dodge(width = 0.4)) + # dodge by dataset
labs(x = "Model type", y = "ROC", colour = "Dataset") +
facet_wrap( ~ factor(species, neworder)) +
scale_x_discrete(
limits = c("individual_algo", "ensemble"),
labels = c("Individual", "Ensemble")
) + # reorder x-axis
scale_colour_manual(
values = c("GBIF + GBIF" = GBIF_only_colour,
"GBIF-eDNA" = GBIF_eDNA_colour,
"Baseline" = baseline_colour),
breaks = c("Baseline", "GBIF + GBIF", "GBIF-eDNA"),# reorder datasets
labels = c("GBIF + GBIF" = "GBIF baseline + GBIF additional",
"GBIF-eDNA" = "GBIF baseline + eDNA",
"Baseline" = "GBIF baseline")
) +
theme_bw() +
theme(text = element_text(size = 13),
strip.text = element_text(face = "italic"))
mean_range_plot_mods_roceval_boxplot_together_both_meanSE <- plot_grid(mean_range_plot_mods_roc, mean_range_plot_mods_TSS, labels = c("a", "b"), ncol = 1, align = "hv")
eval_boxplot_together_both_meanSEggsave(eval_boxplot_together_both_meanSE, file = "Figures/eval_boxplot_together_both_meanSE.png", height = 10, width = 11, dpi = 300)summary_eval2 <- eval_reduced_joined %>%
group_by(dataset, model_type, metric.eval) %>%
summarise(mean = mean(evaluation),
se = sd(evaluation)/sqrt(n()))
summary_eval2# A tibble: 12 × 5
# Groups: dataset, model_type [6]
dataset model_type metric.eval mean se
<chr> <chr> <chr> <dbl> <dbl>
1 Baseline ensemble ROC 0.761 0.0427
2 Baseline ensemble TSS 0.45 0.0619
3 Baseline individual_algo ROC 0.706 0.0236
4 Baseline individual_algo TSS 0.414 0.0347
5 GBIF + GBIF ensemble ROC 0.758 0.0522
6 GBIF + GBIF ensemble TSS 0.464 0.0686
7 GBIF + GBIF individual_algo ROC 0.703 0.0239
8 GBIF + GBIF individual_algo TSS 0.404 0.0345
9 GBIF-eDNA ensemble ROC 0.756 0.0505
10 GBIF-eDNA ensemble TSS 0.459 0.0523
11 GBIF-eDNA individual_algo ROC 0.706 0.0247
12 GBIF-eDNA individual_algo TSS 0.408 0.0340
Variable importance plots
#semibalanus
var.semi.bal.individual.visual <- read.csv("Data/Processed_Data/Semi.bal/Individual-GBIF-GBIF/VariablesImportance_All.csv", row.names = 1)
var.semi.bal.individual.visual$model_type = "individual_algo"
var.semi.bal.individual.visual$dataset = "GBIF + GBIF"
var.semi.bal.individual.visual$species = "Semibalanus balanoides"
var.semi.bal.ensemble.visual <- read.csv("Data/Processed_Data/Semi.bal/Ensemble-GBIF-GBIF/VariablesImportance_All.csv", row.names = 1)
var.semi.bal.ensemble.visual$model_type = "ensemble"
var.semi.bal.ensemble.visual$dataset = "GBIF + GBIF"
var.semi.bal.ensemble.visual$species = "Semibalanus balanoides"
var.semi.bal.individual.combined <- read.csv("Data/Processed_Data/Semi.bal/Individual-GBIF-eDNA/VariablesImportance_All.csv", row.names = 1)
var.semi.bal.individual.combined$model_type = "individual_algo"
var.semi.bal.individual.combined$dataset = "GBIF-eDNA"
var.semi.bal.individual.combined$species = "Semibalanus balanoides"
var.semi.bal.ensemble.combined <- read.csv("Data/Processed_Data/Semi.bal/Ensemble-GBIF-eDNA/VariablesImportance_All.csv", row.names = 1)
var.semi.bal.ensemble.combined$model_type = "ensemble"
var.semi.bal.ensemble.combined$dataset = "GBIF-eDNA"
var.semi.bal.ensemble.combined$species = "Semibalanus balanoides"
var.semi.bal.individual.baseline <- read.csv("Data/Processed_Data/Semi.bal/Individual-baseline/VariablesImportance_All.csv", row.names = 1)
var.semi.bal.individual.baseline$model_type = "individual_algo"
var.semi.bal.individual.baseline$dataset = "Baseline"
var.semi.bal.individual.baseline$species = "Semibalanus balanoides"
var.semi.bal.ensemble.baseline <- read.csv("Data/Processed_Data/Semi.bal/Ensemble-baseline/VariablesImportance_All.csv", row.names = 1)
var.semi.bal.ensemble.baseline$model_type = "ensemble"
var.semi.bal.ensemble.baseline$dataset = "Baseline"
var.semi.bal.ensemble.baseline$species = "Semibalanus balanoides"
# chthamalus
var.chth.mon.individual.visual <- read.csv("Data/Processed_Data/Chtham.mont/Individual-GBIF-GBIF/VariablesImportance_All.csv", row.names = 1)
var.chth.mon.individual.visual$model_type = "individual_algo"
var.chth.mon.individual.visual$dataset = "GBIF + GBIF"
var.chth.mon.individual.visual$species = "Chthamalus montagui"
var.chth.mon.ensemble.visual <- read.csv("Data/Processed_Data/Chtham.mont/Ensemble-GBIF-GBIF/VariablesImportance_All.csv", row.names = 1)
var.chth.mon.ensemble.visual$model_type = "ensemble"
var.chth.mon.ensemble.visual$dataset = "GBIF + GBIF"
var.chth.mon.ensemble.visual$species = "Chthamalus montagui"
var.chth.mon.individual.combined <- read.csv("Data/Processed_Data/Chtham.mont/Individual-GBIF-eDNA/VariablesImportance_All.csv", row.names = 1)
var.chth.mon.individual.combined$model_type = "individual_algo"
var.chth.mon.individual.combined$dataset = "GBIF-eDNA"
var.chth.mon.individual.combined$species = "Chthamalus montagui"
var.chth.mon.ensemble.combined <- read.csv("Data/Processed_Data/Chtham.mont/Ensemble-GBIF-eDNA/VariablesImportance_All.csv", row.names = 1)
var.chth.mon.ensemble.combined$model_type = "ensemble"
var.chth.mon.ensemble.combined$dataset = "GBIF-eDNA"
var.chth.mon.ensemble.combined$species = "Chthamalus montagui"
var.chth.mon.individual.baseline <- read.csv("Data/Processed_Data/Chtham.mont/Individual-baseline/VariablesImportance_All.csv", row.names = 1)
var.chth.mon.individual.baseline$model_type = "individual_algo"
var.chth.mon.individual.baseline$dataset = "Baseline"
var.chth.mon.individual.baseline$species = "Chthamalus montagui"
var.chth.mon.ensemble.baseline <- read.csv("Data/Processed_Data/Chtham.mont/Ensemble-baseline/VariablesImportance_All.csv", row.names = 1)
var.chth.mon.ensemble.baseline$model_type = "ensemble"
var.chth.mon.ensemble.baseline$dataset = "Baseline"
var.chth.mon.ensemble.baseline$species = "Chthamalus montagui"
#join
neworder <- c("Semibalanus balanoides","Chthamalus montagui")
full_vars_ensemble<- rbind(var.semi.bal.ensemble.visual,
var.semi.bal.ensemble.combined,
var.semi.bal.ensemble.baseline,
var.chth.mon.ensemble.visual,
var.chth.mon.ensemble.combined,
var.chth.mon.ensemble.baseline)
full_vars_individual<- rbind(var.semi.bal.individual.visual,
var.semi.bal.individual.combined,
var.semi.bal.individual.baseline,
var.chth.mon.individual.visual,
var.chth.mon.individual.combined,
var.chth.mon.individual.baseline)
full_vars_individual <- arrange(transform(full_vars_individual,
species=factor(species,levels=neworder)),species)
# filter for mean only in ensemble
full_vars_ensemble_mean <- full_vars_ensemble %>% filter(algo == "EMmean")vars_boxplot_ensemble<- ggplot(full_vars_ensemble_mean %>%
mutate(dataset = factor(
dataset, levels = c("Baseline", "GBIF + GBIF", "GBIF-eDNA")
)), aes(x = expl.var, y = var.imp, fill = dataset)) +
geom_boxplot() +
theme_bw() +
facet_wrap(~factor(species, neworder)) +
scale_fill_manual(
values = c(
"GBIF + GBIF" = GBIF_only_colour,
"GBIF-eDNA" = GBIF_eDNA_colour,
"Baseline" = baseline_colour
),
breaks = c("Baseline", "GBIF + GBIF", "GBIF-eDNA"), # reorder datasets
labels = c(
"GBIF + GBIF" = "GBIF baseline + GBIF additional",
"GBIF-eDNA" = "GBIF baseline + eDNA",
"Baseline" = "GBIF baseline"
)
) +
labs(x = "Environmental variable", y = "Variable importance", fill = "Dataset")+
scale_x_discrete(labels = c("SST", "pH", "Wave fetch"))+
theme(strip.text = element_text(face = "italic"),
legend.position = "bottom")
vars_boxplot_ensembleggsave(vars_boxplot_ensemble, file = "Figures/vars_boxplot_ensemble.png", height = 5, width = 6, dpi = 300)vars_boxplot_individual <- ggplot(
full_vars_individual %>%
mutate(dataset = factor(
dataset, levels = c("Baseline", "GBIF + GBIF", "GBIF-eDNA")
)),
aes(x = expl.var, y = var.imp, fill = dataset)
) +
geom_boxplot() +
theme_bw() +
facet_grid(algo ~ factor(species, neworder)) +
labs(x = "Environmental variable", y = "Variable importance", fill = "Dataset") +
scale_fill_manual(
values = c(
"GBIF + GBIF" = GBIF_only_colour,
"GBIF-eDNA" = GBIF_eDNA_colour,
"Baseline" = baseline_colour
),
breaks = c("Baseline", "GBIF + GBIF", "GBIF-eDNA"), # reorder datasets
labels = c(
"GBIF + GBIF" = "GBIF baseline + GBIF additional",
"GBIF-eDNA" = "GBIF baseline + eDNA",
"Baseline" = "GBIF baseline"
)
) +
scale_x_discrete(labels = c("SST", "pH", "Wave fetch")) +
theme(strip.text = element_text(face = "italic"),
legend.position = "bottom")
vars_boxplot_individualggsave(vars_boxplot_individual, file = "Figures/vars_boxplot_individual.png", height = 9, width = 7, dpi = 300)Response curve pots
SB (GBIF + GBIF)
# Semibalanus - GBIF + GBIF
response_mean_SB_visual <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_GBIF,
models.chosen = "all",
fixed.var = 'mean')tab_mean_SB_visual <- response_mean_SB_visual$tab
response_mean_SB_visual_ensemble <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_GBIF,
models.chosen = "all",
fixed.var = 'mean')tab_mean_SB_visual_ensemble <- response_mean_SB_visual_ensemble$tab
tab_mean_SB_visual_ensemble_reduced <- tab_mean_SB_visual_ensemble %>%
filter(pred.name == "Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo")
tab_mean_SB_visual_reduced <- tab_mean_SB_visual %>%
filter(pred.name == c("Semibalanus.balanoides_allData_allRun_GLM",
"Semibalanus.balanoides_allData_allRun_MAXENT",
"Semibalanus.balanoides_allData_allRun_RF",
"Semibalanus.balanoides_allData_allRun_GAM"))
tab_mean_SB_visual_reduced_add <- rbind(tab_mean_SB_visual_reduced, tab_mean_SB_visual_ensemble_reduced)
plot_mean_SB_visual <- ggplot(
tab_mean_SB_visual_reduced_add,
aes(x = expl.val, y = pred.val, color = pred.name)
) +
geom_line(linewidth = 1) +
facet_wrap(
~expl.name,
scales = "free_x",
labeller = labeller(expl.name = c(
"ocean_temp" = "SST",
"ph" = "pH",
"wave_fetch" = "Wave fetch"
))
) +
scale_color_manual(
values = c("Semibalanus.balanoides_allData_allRun_GAM" = "blue",
"Semibalanus.balanoides_allData_allRun_GLM" = "red",
"Semibalanus.balanoides_allData_allRun_RF" = "green",
"Semibalanus.balanoides_allData_allRun_MAXENT" = "purple",
"Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "black"),
labels = c("Semibalanus.balanoides_allData_allRun_GAM" = "GAM",
"Semibalanus.balanoides_allData_allRun_GLM" = "GLM",
"Semibalanus.balanoides_allData_allRun_RF" = "RF",
"Semibalanus.balanoides_allData_allRun_MAXENT" = "MAXENT",
"Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "Ensemble"),
name = "Model"
) +
labs(x = "Predictor", y = "Response") +
theme_bw()
plot_mean_SB_visualSB (GBIF + eDNA)
# Semibalanus - GBIF + eDNA
response_mean_SB_combined <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA,
models.chosen = "all",
fixed.var = 'mean')tab_mean_SB_combined <- response_mean_SB_combined$tab
response_mean_SB_combined_ensemble <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_eDNA,
models.chosen = "all",
fixed.var = 'mean')tab_mean_SB_combined_ensemble <- response_mean_SB_combined_ensemble$tab
tab_mean_SB_combined_ensemble_reduced <- tab_mean_SB_combined_ensemble %>%
filter(pred.name == "Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo")
tab_mean_SB_combined_reduced <- tab_mean_SB_combined %>%
filter(pred.name == c("Semibalanus.balanoides_allData_allRun_GLM",
"Semibalanus.balanoides_allData_allRun_MAXENT",
"Semibalanus.balanoides_allData_allRun_RF",
"Semibalanus.balanoides_allData_allRun_GAM"))
tab_mean_SB_combined_reduced_add <- rbind(tab_mean_SB_combined_reduced, tab_mean_SB_combined_ensemble_reduced)
plot_mean_SB_combined <- ggplot(
tab_mean_SB_combined_reduced_add,
aes(x = expl.val, y = pred.val, color = pred.name)
) +
geom_line(linewidth = 1) +
facet_wrap(
~expl.name,
scales = "free_x",
labeller = labeller(expl.name = c(
"ocean_temp" = "SST",
"ph" = "pH",
"wave_fetch" = "Wave fetch"
))
) +
scale_color_manual(
values = c("Semibalanus.balanoides_allData_allRun_GAM" = "blue",
"Semibalanus.balanoides_allData_allRun_GLM" = "red",
"Semibalanus.balanoides_allData_allRun_RF" = "green",
"Semibalanus.balanoides_allData_allRun_MAXENT" = "purple",
"Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "black"),
labels = c("Semibalanus.balanoides_allData_allRun_GAM" = "GAM",
"Semibalanus.balanoides_allData_allRun_GLM" = "GLM",
"Semibalanus.balanoides_allData_allRun_RF" = "RF",
"Semibalanus.balanoides_allData_allRun_MAXENT" = "MAXENT",
"Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "Ensemble"),
name = "Model"
) +
labs(x = "Predictor", y = "Response") +
theme_bw()+
theme(legend.position = "none")
plot_mean_SB_combinedSB (GBIF baseline)
# Semibalanus -baseline
response_mean_SB_baseline <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_semibalanus_GBIF_eDNA,
models.chosen = "all",
fixed.var = 'mean')tab_mean_SB_baseline <- response_mean_SB_baseline$tab
response_mean_SB_baseline_ensemble <- bm_PlotResponseCurves(bm.out = myBiomodEM_semibalanus_GBIF_eDNA,
models.chosen = "all",
fixed.var = 'mean')tab_mean_SB_baseline_ensemble <- response_mean_SB_baseline_ensemble$tab
tab_mean_SB_baseline_ensemble_reduced <- tab_mean_SB_baseline_ensemble %>%
filter(pred.name == "Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo")
tab_mean_SB_baseline_reduced <- tab_mean_SB_baseline %>%
filter(pred.name == c("Semibalanus.balanoides_allData_allRun_GLM",
"Semibalanus.balanoides_allData_allRun_MAXENT",
"Semibalanus.balanoides_allData_allRun_RF",
"Semibalanus.balanoides_allData_allRun_GAM"))
tab_mean_SB_baseline_reduced_add <- rbind(tab_mean_SB_baseline_reduced, tab_mean_SB_baseline_ensemble_reduced)
plot_mean_SB_baseline <- ggplot(
tab_mean_SB_baseline_reduced_add,
aes(x = expl.val, y = pred.val, color = pred.name)
) +
geom_line(linewidth = 1) +
facet_wrap(
~expl.name,
scales = "free_x",
labeller = labeller(expl.name = c(
"ocean_temp" = "SST",
"ph" = "pH",
"wave_fetch" = "Wave fetch"
))
) +
scale_color_manual(
values = c("Semibalanus.balanoides_allData_allRun_GAM" = "blue",
"Semibalanus.balanoides_allData_allRun_GLM" = "red",
"Semibalanus.balanoides_allData_allRun_RF" = "green",
"Semibalanus.balanoides_allData_allRun_MAXENT" = "purple",
"Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "black"),
labels = c("Semibalanus.balanoides_allData_allRun_GAM" = "GAM",
"Semibalanus.balanoides_allData_allRun_GLM" = "GLM",
"Semibalanus.balanoides_allData_allRun_RF" = "RF",
"Semibalanus.balanoides_allData_allRun_MAXENT" = "MAXENT",
"Semibalanus.balanoides_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "Ensemble"),
name = "Model"
) +
labs(x = "Predictor", y = "Response") +
theme_bw()+
theme(legend.position = "none")
plot_mean_SB_baselineCM (GBIF + GBIF)
# Chthamalus - GBIF + GBIF
response_mean_CM_visual <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF,
models.chosen = "all",
fixed.var = 'mean')tab_mean_CM_visual <- response_mean_CM_visual$tab
response_mean_CM_visual_ensemble <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_GBIF,
models.chosen = "all",
fixed.var = 'mean')tab_mean_CM_visual_ensemble <- response_mean_CM_visual_ensemble$tab
tab_mean_CM_visual_ensemble_reduced <- tab_mean_CM_visual_ensemble %>%
filter(pred.name == "Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo")
tab_mean_CM_visual_reduced <- tab_mean_CM_visual %>%
filter(pred.name == c("Chthamalus.montagui_allData_allRun_GLM",
"Chthamalus.montagui_allData_allRun_MAXENT",
"Chthamalus.montagui_allData_allRun_RF",
"Chthamalus.montagui_allData_allRun_GAM"))
tab_mean_CM_visual_reduced_add <- rbind(tab_mean_CM_visual_reduced, tab_mean_CM_visual_ensemble_reduced)
plot_mean_CM_visual <- ggplot(
tab_mean_CM_visual_reduced_add,
aes(x = expl.val, y = pred.val, color = pred.name)
) +
geom_line(linewidth = 1) +
facet_wrap(
~expl.name,
scales = "free_x",
labeller = labeller(expl.name = c(
"ocean_temp" = "SST",
"ph" = "pH",
"wave_fetch" = "Wave fetch"
))
) +
scale_color_manual(
values = c("Chthamalus.montagui_allData_allRun_GAM" = "blue",
"Chthamalus.montagui_allData_allRun_GLM" = "red",
"Chthamalus.montagui_allData_allRun_RF" = "green",
"Chthamalus.montagui_allData_allRun_MAXENT" = "purple",
"Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "black"),
labels = c("Chthamalus.montagui_allData_allRun_GAM" = "GAM",
"Chthamalus.montagui_allData_allRun_GLM" = "GLM",
"Chthamalus.montagui_allData_allRun_RF" = "RF",
"Chthamalus.montagui_allData_allRun_MAXENT" = "MAXENT",
"Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "Ensemble"),
name = "Model"
) +
labs(x = "Predictor", y = "Response") +
theme_bw()
plot_mean_CM_visualCM (GBIF + eDNA)
# Chthamalus - GBIF + eDNA
response_mean_CM_combined <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA,
models.chosen = "all",
fixed.var = 'mean')tab_mean_CM_combined <- response_mean_CM_combined$tab
response_mean_CM_combined_ensemble <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_eDNA,
models.chosen = "all",
fixed.var = 'mean')tab_mean_CM_combined_ensemble <- response_mean_CM_combined_ensemble$tab
tab_mean_CM_combined_ensemble_reduced <- tab_mean_CM_combined_ensemble %>%
filter(pred.name == "Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo")
tab_mean_CM_combined_reduced <- tab_mean_CM_combined %>%
filter(pred.name == c("Chthamalus.montagui_allData_allRun_GLM",
"Chthamalus.montagui_allData_allRun_MAXENT",
"Chthamalus.montagui_allData_allRun_RF",
"Chthamalus.montagui_allData_allRun_GAM"))
tab_mean_CM_combined_reduced_add <- rbind(tab_mean_CM_combined_reduced, tab_mean_CM_combined_ensemble_reduced)
plot_mean_CM_combined <- ggplot(
tab_mean_CM_combined_reduced_add,
aes(x = expl.val, y = pred.val, color = pred.name)
) +
geom_line(linewidth = 1) +
facet_wrap(
~expl.name,
scales = "free_x",
labeller = labeller(expl.name = c(
"ocean_temp" = "SST",
"ph" = "pH",
"wave_fetch" = "Wave fetch"
))
) +
scale_color_manual(
values = c("Chthamalus.montagui_allData_allRun_GAM" = "blue",
"Chthamalus.montagui_allData_allRun_GLM" = "red",
"Chthamalus.montagui_allData_allRun_RF" = "green",
"Chthamalus.montagui_allData_allRun_MAXENT" = "purple",
"Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "black"),
labels = c("Chthamalus.montagui_allData_allRun_GAM" = "GAM",
"Chthamalus.montagui_allData_allRun_GLM" = "GLM",
"Chthamalus.montagui_allData_allRun_RF" = "RF",
"Chthamalus.montagui_allData_allRun_MAXENT" = "MAXENT",
"Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "Ensemble"),
name = "Model"
) +
labs(x = "Predictor", y = "Response") +
theme_bw() +
theme(legend.position = "none")
plot_mean_CM_combinedCM (GBIF baseline)
# Chthamalus - baseline
response_mean_CM_baseline <- bm_PlotResponseCurves(bm.out = myBiomodModelOut_chthamalus_GBIF_eDNA,
models.chosen = "all",
fixed.var = 'mean')tab_mean_CM_baseline <- response_mean_CM_baseline$tab
response_mean_CM_baseline_ensemble <- bm_PlotResponseCurves(bm.out = myBiomodEM_chthamalus_GBIF_eDNA,
models.chosen = "all",
fixed.var = 'mean')tab_mean_CM_baseline_ensemble <- response_mean_CM_baseline_ensemble$tab
tab_mean_CM_baseline_ensemble_reduced <- tab_mean_CM_baseline_ensemble %>%
filter(pred.name == "Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo")
tab_mean_CM_baseline_reduced <- tab_mean_CM_baseline %>%
filter(pred.name == c("Chthamalus.montagui_allData_allRun_GLM",
"Chthamalus.montagui_allData_allRun_MAXENT",
"Chthamalus.montagui_allData_allRun_RF",
"Chthamalus.montagui_allData_allRun_GAM"))
tab_mean_CM_baseline_reduced_add <- rbind(tab_mean_CM_baseline_reduced, tab_mean_CM_baseline_ensemble_reduced)
plot_mean_CM_baseline <- ggplot(
tab_mean_CM_baseline_reduced_add,
aes(x = expl.val, y = pred.val, color = pred.name)
) +
geom_line(linewidth = 1) +
facet_wrap(
~expl.name,
scales = "free_x",
labeller = labeller(expl.name = c(
"ocean_temp" = "SST",
"ph" = "pH",
"wave_fetch" = "Wave fetch"
))
) +
scale_color_manual(
values = c("Chthamalus.montagui_allData_allRun_GAM" = "blue",
"Chthamalus.montagui_allData_allRun_GLM" = "red",
"Chthamalus.montagui_allData_allRun_RF" = "green",
"Chthamalus.montagui_allData_allRun_MAXENT" = "purple",
"Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "black"),
labels = c("Chthamalus.montagui_allData_allRun_GAM" = "GAM",
"Chthamalus.montagui_allData_allRun_GLM" = "GLM",
"Chthamalus.montagui_allData_allRun_RF" = "RF",
"Chthamalus.montagui_allData_allRun_MAXENT" = "MAXENT",
"Chthamalus.montagui_EMmeanByROC_mergedData_mergedRun_mergedAlgo" = "Ensemble"),
name = "Model"
) +
labs(x = "Predictor", y = "Response") +
theme_bw() +
theme(legend.position = "none")
plot_mean_CM_baselinePlot together
response_curves_together_SB <- plot_grid(plot_mean_SB_baseline,
plot_mean_SB_visual,
plot_mean_SB_combined,
ncol = 1,
align = "hv")
response_curves_together_SBggsave(response_curves_together_SB, file = "Figures/response_curves_together_SB.png", height = 12, width = 10, dpi = 300)
response_curves_together_CM <- plot_grid(plot_mean_CM_baseline,
plot_mean_CM_visual,
plot_mean_CM_combined,
labels = c("a", "b", "c"),
ncol = 1,
align = "hv")
response_curves_together_CMggsave(response_curves_together_CM, file = "Figures/response_curves_together_CM.png", height = 12, width = 10, dpi = 300)Concluding remarks
We have now produced all the analyses for chapter 4.